diff -pruN 0.23.2-5/conftest.py 1.1.1-1/conftest.py
--- 0.23.2-5/conftest.py	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/conftest.py	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,6 @@
+# Even if empty this file is useful so that when running from the root folder
+# ./sklearn is added to sys.path by pytest. See
+# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more
+# details. For example, this allows to build extensions in place and run pytest
+# doc/modules/clustering.rst and use sklearn from the local folder rather than
+# the one from site-packages.
diff -pruN 0.23.2-5/COPYING 1.1.1-1/COPYING
--- 0.23.2-5/COPYING	2020-08-04 12:12:58.852675200 +0000
+++ 1.1.1-1/COPYING	2022-05-19 12:16:26.428781300 +0000
@@ -1,32 +1,29 @@
-New BSD License
+BSD 3-Clause License
 
-Copyright (c) 2007–2020 The scikit-learn developers.
+Copyright (c) 2007-2021 The scikit-learn developers.
 All rights reserved.
 
-
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
-  a. Redistributions of source code must retain the above copyright notice,
-     this list of conditions and the following disclaimer.
-  b. Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-  c. Neither the name of the Scikit-learn Developers  nor the names of
-     its contributors may be used to endorse or promote products
-     derived from this software without specific prior written
-     permission. 
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
 
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
-
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff -pruN 0.23.2-5/.coveragerc 1.1.1-1/.coveragerc
--- 0.23.2-5/.coveragerc	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/.coveragerc	2022-05-19 12:16:26.428781300 +0000
@@ -0,0 +1,9 @@
+[run]
+branch = True
+source = sklearn
+parallel = True
+omit =
+    */sklearn/externals/*
+    */sklearn/_build_utils/*
+    */benchmarks/*
+    **/setup.py
diff -pruN 0.23.2-5/debian/changelog 1.1.1-1/debian/changelog
--- 0.23.2-5/debian/changelog	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/changelog	2022-05-23 15:38:39.000000000 +0000
@@ -1,3 +1,73 @@
+scikit-learn (1.1.1-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream release.
+  * Refresh patches.
+
+ -- Chiara Marmo <marmochiaskl@gmail.com>  Mon, 23 May 2022 21:08:39 +0530
+
+scikit-learn (1.0.2-1) unstable; urgency=medium
+
+  * Team upload.
+
+  [ Drew Parsons ]
+  * run tests without colour to make build and debci logs more legible
+  * debian/tests Depends: python3-setuptools
+
+  [ Chiara Marmo ]
+  * New upstream release. Update Standards-Version.
+
+  [ Andreas Tille ]
+  * Rename debian/.gitlab-ci.yml to debian/salsa-ci.yml
+  * Relax test tolerance
+  * Revert nocheck option from NMU since the check will pass now hopefully
+  * Fix files location in d/copyright
+  * Drop outdated debian/TODO
+  * Update debian/source/lintian-overrides
+  * bootstrap-responsive*.css is not vendored by scikit-learn docs any more
+    Closes: #924213
+
+ -- Andreas Tille <tille@debian.org>  Tue, 15 Feb 2022 17:44:37 +0100
+
+scikit-learn (1.0.1-1.1) unstable; urgency=medium
+
+  * Non-maintainer upload.
+  * To advance the python3.10 transition, disable the tests during build
+    for now (see: #1003165)
+
+ -- Paul Gevers <elbrus@debian.org>  Fri, 21 Jan 2022 22:38:09 +0100
+
+scikit-learn (1.0.1-1) unstable; urgency=medium
+
+  * Team upload.
+
+  [ Steffen Moeller ]
+  * Created d/u/metadata
+
+  [ Drew Parsons ]
+  * New upstream release. Closes: #999528.
+    - applies or deprecates debian patches:
+       disable-doctest.patch
+       Skip-two-tests-when-SciPy-1.5.patch
+       Use-assert_allclose-instead-of-equality-in-FP-comparison.patch
+       Change-expected-result-type-to-pass-on-32-bit.patch
+       Increase-tolerance-for-RANSACRegressor-test.patch
+       fix_scipy1.6_PR18711.patch
+       skip_test_ELKI_PR19115.patch
+  * Build-Depends-Indep: python3-sphinx-prompt,
+    python3-sphinxext-opengraph
+  * Versioned Build-Depends: cython3 (>= 0.28.5~),
+     python3-numpy (>= 1.14.6~), python3-scipy (>= 1.1.10~)
+  * Drop Build-Depends: libatlas-base-dev.
+    Build against generic libblas-dev, use specific optimized blas at
+    runtime. Closes: #918570.
+  * debian patch fix_test_calibration_labels_PR21697.patch applies
+    upstream PR#21697 to fix label access in test_calibration
+  * skip test_load_boston_alternative at build-time (debian builds
+    must not access the network)
+
+ -- Drew Parsons <dparsons@debian.org>  Sat, 27 Nov 2021 15:21:20 +0100
+
 scikit-learn (0.23.2-5) unstable; urgency=medium
 
   * Team upload.
diff -pruN 0.23.2-5/debian/control 1.1.1-1/debian/control
--- 0.23.2-5/debian/control	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/control	2022-05-23 15:38:12.000000000 +0000
@@ -8,23 +8,26 @@ Uploaders: Yaroslav Halchenko <debian@on
 Build-Depends: debhelper-compat (= 13),
                dh-python,
                python3-all-dev,
-               cython3,
+               cython3 (>= 0.28.5~),
                python3-setuptools,
-               python3-numpy, python3-scipy,
+               python3-numpy (>= 1.14.6~),
+               python3-scipy (>= 1.1.10~),
                python3-nose, python3-pytest,
                python3-matplotlib,
                python3-joblib (>= 0.9.2),
                python3-threadpoolctl (>= 2.0.0),
                libsvm-dev (>= 2.84.0),
                libblas-dev | libblas.so,
-               libatlas-base-dev,
                python3-pandas <!nocheck>,
                python3-pil <!nocheck>,
 Build-Depends-Indep: python3-sphinx (>= 1.0),
                      python3-sphinx-gallery,
+                     python3-sphinx-prompt,
+                     python3-sphinxext-opengraph,
                      python3-numpydoc (>= 0.9),
                      python3-pandas,
                      python3-skimage,
+                     python3-seaborn,
                      python3-pil,
                      librsvg2-bin,
                      graphviz,
@@ -34,10 +37,10 @@ Build-Depends-Indep: python3-sphinx (>=
                      python-matplotlib-doc,
                      python-pandas-doc,
 Rules-Requires-Root: no
-Standards-Version: 4.5.1
+Standards-Version: 4.6.1
 Vcs-Browser: https://salsa.debian.org/science-team/scikit-learn
 Vcs-Git: https://salsa.debian.org/science-team/scikit-learn.git
-Homepage: http://scikit-learn.sourceforge.net
+Homepage: https://scikit-learn.org
 
 Package: python3-sklearn
 Architecture: all
diff -pruN 0.23.2-5/debian/copyright 1.1.1-1/debian/copyright
--- 0.23.2-5/debian/copyright	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/copyright	2022-05-23 15:38:12.000000000 +0000
@@ -1,6 +1,6 @@
 Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Upstream-Name: scikit-learn
-Upstream-Contact: Fabian Pedregosa <fabian.pedregosa@inria.fr>
+Upstream-Contact: https://github.com/scikit-learn/scikit-learn/issues
 Source: http://github.com/scikit-learn/scikit-learn
 
 Files: *
@@ -19,11 +19,7 @@ Copyright: 2000-2010 Chih-Chung Chang
 License: BSD-3-Clause
 Comment: Partial fork of the LIBSVM package.
 
-Files: doc/themes/scikit-learn/static/jquery.maphilight.js
-Copyright: 2008 David Lynch, http://davidlynch.org/
-License: Expat
-
-Files: doc/themes/scikit-learn/static/*/bootstrap*
+Files: doc/themes/scikit-learn-modern/static/*/bootstrap*
 Copyright: 2012 Twitter, Inc
 License: Apache-2.0
 
@@ -40,10 +36,6 @@ Files: doc/themes/scikit-learn-modern/st
 Copyright: Copyright 2011-2019 The Bootstrap Authors
 License: Expat
 
-Files: doc/themes/scikit-learn/static/nature.css_t
-Copyright: 2007-2010 Sphinx team
-License: BSD-3-Clause
-
 Files: sklearn/covariance/_graph_lasso.py
 Copyright: INRIA
 License: BSD-3-Clause
diff -pruN 0.23.2-5/debian/patches/Change-expected-result-type-to-pass-on-32-bit.patch 1.1.1-1/debian/patches/Change-expected-result-type-to-pass-on-32-bit.patch
--- 0.23.2-5/debian/patches/Change-expected-result-type-to-pass-on-32-bit.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/Change-expected-result-type-to-pass-on-32-bit.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,30 +0,0 @@
-From: Christian Kastner <ckk@debian.org>
-Date: Tue, 4 Aug 2020 13:47:28 +0200
-Subject: Change expected result type to pass on 32-bit
-
-Origin: upstream, https://github.com/scikit-learn/scikit-learn/issues/18084
----
- sklearn/datasets/tests/test_base.py | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
-index 3ec6007..c53dcfb 100644
---- a/sklearn/datasets/tests/test_base.py
-+++ b/sklearn/datasets/tests/test_base.py
-@@ -234,12 +234,12 @@ def test_load_breast_cancer():
- 
- 
- @pytest.mark.parametrize("loader_func, data_dtype, target_dtype", [
--    (load_breast_cancer, np.float64, np.int64),
-+    (load_breast_cancer, np.float64, int),
-     (load_diabetes, np.float64, np.float64),
--    (load_digits, np.float64, np.int64),
--    (load_iris, np.float64, np.int64),
-+    (load_digits, np.float64, int),
-+    (load_iris, np.float64, int),
-     (load_linnerud, np.float64, np.float64),
--    (load_wine, np.float64, np.int64),
-+    (load_wine, np.float64, int),
- ])
- def test_toy_dataset_as_frame(loader_func, data_dtype, target_dtype):
-     default_result = loader_func()
diff -pruN 0.23.2-5/debian/patches/Disable-BinderHub-links.patch 1.1.1-1/debian/patches/Disable-BinderHub-links.patch
--- 0.23.2-5/debian/patches/Disable-BinderHub-links.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/Disable-BinderHub-links.patch	2022-05-23 15:38:12.000000000 +0000
@@ -7,22 +7,20 @@ Forwarded: not needed
  doc/conf.py | 8 --------
  1 file changed, 8 deletions(-)
 
-diff --git a/doc/conf.py b/doc/conf.py
-index 701fcee..57fe0bc 100644
 --- a/doc/conf.py
 +++ b/doc/conf.py
-@@ -324,14 +324,6 @@ sphinx_gallery_conf = {
-     'examples_dirs': ['../examples'],
-     'gallery_dirs': ['auto_examples'],
-     'subsection_order': SubSectionTitleOrder('../examples'),
--    'binder': {
--        'org': 'scikit-learn',
--        'repo': 'scikit-learn',
--        'binderhub_url': 'https://mybinder.org',
--        'branch': binder_branch,
--        'dependencies': './binder/requirements.txt',
--        'use_jupyter_lab': True
+@@ -388,14 +388,6 @@
+     "gallery_dirs": ["auto_examples"],
+     "subsection_order": SubSectionTitleOrder("../examples"),
+     "within_subsection_order": SKExampleTitleSortKey,
+-    "binder": {
+-        "org": "scikit-learn",
+-        "repo": "scikit-learn",
+-        "binderhub_url": "https://mybinder.org",
+-        "branch": binder_branch,
+-        "dependencies": "./binder/requirements.txt",
+-        "use_jupyter_lab": True,
 -    },
      # avoid generating too many cross links
-     'inspect_global_variables': False,
-     'remove_config_comments': True,
+     "inspect_global_variables": False,
+     "remove_config_comments": True,
diff -pruN 0.23.2-5/debian/patches/disable-doctest.patch 1.1.1-1/debian/patches/disable-doctest.patch
--- 0.23.2-5/debian/patches/disable-doctest.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/disable-doctest.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,21 +0,0 @@
-From: yangfl <yangfl@users.noreply.github.com>
-Date: Fri, 19 Oct 2018 23:42:39 +0800
-Subject: Disable doctest to prevent Python 2 doctest failure
-
-===================================================================
----
- setup.cfg | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/setup.cfg b/setup.cfg
-index f086993..cce94be 100644
---- a/setup.cfg
-+++ b/setup.cfg
-@@ -11,7 +11,6 @@ addopts =
-     --ignore doc
-     --ignore examples
-     --ignore maint_tools
--    --doctest-modules
-     --disable-pytest-warnings
-     -rxXs
- 
diff -pruN 0.23.2-5/debian/patches/fix_scipy1.6_PR18711.patch 1.1.1-1/debian/patches/fix_scipy1.6_PR18711.patch
--- 0.23.2-5/debian/patches/fix_scipy1.6_PR18711.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/fix_scipy1.6_PR18711.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,64 +0,0 @@
-From eb685d534f06c8fe45de1c29db7d2ff58e011100 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Juan=20Carlos=20Alfaro=20Jim=C3=A9nez?=
- <JuanCarlos.Alfaro@uclm.es>
-Date: Fri, 30 Oct 2020 09:20:36 +0100
-Subject: [PATCH 01/10] CI Fix Travis CRON job failures
-
----
- sklearn/metrics/_ranking.py            | 2 +-
- sklearn/metrics/cluster/_supervised.py | 4 ++--
- sklearn/metrics/tests/test_ranking.py  | 2 +-
- sklearn/naive_bayes.py                 | 2 +-
- sklearn/utils/estimator_checks.py      | 2 +-
- 5 files changed, 6 insertions(+), 6 deletions(-)
-
-Index: scikit-learn/sklearn/utils/_seq_dataset.pyx.tp
-===================================================================
---- scikit-learn.orig/sklearn/utils/_seq_dataset.pyx.tp	2021-01-18 02:37:08.698404725 +1100
-+++ scikit-learn/sklearn/utils/_seq_dataset.pyx.tp	2021-01-18 02:37:08.690404605 +1100
-@@ -262,7 +262,7 @@
-         self.feature_indices_ptr = <int *> feature_indices.data
- 
-         self.current_index = -1
--        self.X_stride = X.strides[0] / X.itemsize
-+        self.X_stride = X.strides[0] // X.itemsize
-         self.X_data_ptr = <{{c_type}} *>X.data
-         self.Y_data_ptr = <{{c_type}} *>Y.data
-         self.sample_weight_data = <{{c_type}} *>sample_weights.data
-Index: scikit-learn/sklearn/neighbors/_binary_tree.pxi
-===================================================================
---- scikit-learn.orig/sklearn/neighbors/_binary_tree.pxi	2021-01-18 02:37:08.698404725 +1100
-+++ scikit-learn/sklearn/neighbors/_binary_tree.pxi	2021-01-18 02:37:08.690404605 +1100
-@@ -1071,7 +1071,7 @@
-         # determine number of levels in the tree, and from this
-         # the number of nodes in the tree.  This results in leaf nodes
-         # with numbers of points between leaf_size and 2 * leaf_size
--        self.n_levels = np.log2(fmax(1, (n_samples - 1) / self.leaf_size)) + 1
-+        self.n_levels = int(np.log2(fmax(1, (n_samples - 1) / self.leaf_size)) + 1)
-         self.n_nodes = (2 ** self.n_levels) - 1
- 
-         # allocate arrays for storage
-Index: scikit-learn/sklearn/utils/optimize.py
-===================================================================
---- scikit-learn.orig/sklearn/utils/optimize.py	2021-01-18 02:37:08.698404725 +1100
-+++ scikit-learn/sklearn/utils/optimize.py	2021-01-18 02:37:08.698404725 +1100
-@@ -234,13 +234,18 @@
-     # handle both scipy and scikit-learn solver names
-     if solver == "lbfgs":
-         if result.status != 0:
-+            try:
-+                # The message is already decoded in scipy>=1.6.0
-+                result_message = result.message.decode("latin1")
-+            except AttributeError:
-+                result_message = result.message
-             warning_msg = (
-                 "{} failed to converge (status={}):\n{}.\n\n"
-                 "Increase the number of iterations (max_iter) "
-                 "or scale the data as shown in:\n"
-                 "    https://scikit-learn.org/stable/modules/"
-                 "preprocessing.html"
--            ).format(solver, result.status, result.message.decode("latin1"))
-+            ).format(solver, result.status, result_message)
-             if extra_warning_msg is not None:
-                 warning_msg += "\n" + extra_warning_msg
-             warnings.warn(warning_msg, ConvergenceWarning, stacklevel=2)
diff -pruN 0.23.2-5/debian/patches/git-revision-cmd.patch 1.1.1-1/debian/patches/git-revision-cmd.patch
--- 0.23.2-5/debian/patches/git-revision-cmd.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/git-revision-cmd.patch	2022-05-23 15:38:12.000000000 +0000
@@ -9,17 +9,18 @@ Last-Update: 2020-02-13
  doc/sphinxext/github_link.py | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)
 
-diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py
-index 1592b26..c9a441e 100644
---- a/doc/sphinxext/github_link.py
-+++ b/doc/sphinxext/github_link.py
-@@ -5,7 +5,8 @@ import os
+Index: scikit-learn/doc/sphinxext/github_link.py
+===================================================================
+--- scikit-learn.orig/doc/sphinxext/github_link.py	2021-11-27 09:53:40.436505172 +0100
++++ scikit-learn/doc/sphinxext/github_link.py	2021-11-27 09:53:58.837630439 +0100
+@@ -5,8 +5,8 @@
  import sys
  from functools import partial
  
--REVISION_CMD = 'git rev-parse --short HEAD'
-+#REVISION_CMD = 'git rev-parse --short HEAD'
+-REVISION_CMD = "git rev-parse --short HEAD"
+-
++#REVISION_CMD = "git rev-parse --short HEAD"
 +REVISION_CMD = 'cat ../debian/github-revision.txt'
  
- 
  def _get_git_revision():
+     try:
diff -pruN 0.23.2-5/debian/patches/Increase-tolerance-for-RANSACRegressor-test.patch 1.1.1-1/debian/patches/Increase-tolerance-for-RANSACRegressor-test.patch
--- 0.23.2-5/debian/patches/Increase-tolerance-for-RANSACRegressor-test.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/Increase-tolerance-for-RANSACRegressor-test.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,19 +0,0 @@
-From: Christian Kastner <ckk@debian.org>
-Date: Tue, 4 Aug 2020 22:34:07 +0200
-Subject: Increase tolerance for RANSACRegressor test
-
-Origin: upstream, https://github.com/scikit-learn/scikit-learn/pull/18082
----
- sklearn/linear_model/tests/test_ransac.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
-index 3710f38..d4c83c0 100644
---- a/sklearn/linear_model/tests/test_ransac.py
-+++ b/sklearn/linear_model/tests/test_ransac.py
-@@ -512,4 +512,4 @@ def test_ransac_final_model_fit_sample_weight():
-         sample_weight=sample_weight[mask_samples]
-     )
- 
--    assert_allclose(ransac.estimator_.coef_, final_model.coef_)
-+    assert_allclose(ransac.estimator_.coef_, final_model.coef_, atol=1e-12)
diff -pruN 0.23.2-5/debian/patches/no-buttons-js.patch 1.1.1-1/debian/patches/no-buttons-js.patch
--- 0.23.2-5/debian/patches/no-buttons-js.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/no-buttons-js.patch	2022-05-23 15:38:12.000000000 +0000
@@ -7,11 +7,11 @@ Forwarded: not-needed
  doc/developers/contributing.rst | 1 -
  1 file changed, 1 deletion(-)
 
-diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
-index e13b685..88bdb17 100644
---- a/doc/developers/contributing.rst
-+++ b/doc/developers/contributing.rst
-@@ -75,7 +75,6 @@ and follows the decision-making process outlined in :ref:`governance`.
+Index: scikit-learn/doc/developers/contributing.rst
+===================================================================
+--- scikit-learn.orig/doc/developers/contributing.rst	2021-11-27 10:05:29.191790856 +0100
++++ scikit-learn/doc/developers/contributing.rst	2021-11-27 10:05:29.183790367 +0100
+@@ -80,7 +80,6 @@
     <a class="github-button" href="https://github.com/scikit-learn/scikit-learn"
     data-icon="octicon-star" data-size="large" data-show-count="true" aria-label="Star
     scikit-learn/scikit-learn on GitHub">Star</a>
diff -pruN 0.23.2-5/debian/patches/no-download-intersphinx-mapping.patch 1.1.1-1/debian/patches/no-download-intersphinx-mapping.patch
--- 0.23.2-5/debian/patches/no-download-intersphinx-mapping.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/no-download-intersphinx-mapping.patch	2022-05-23 15:38:12.000000000 +0000
@@ -7,26 +7,24 @@ Subject: No-downloading intersphinx mapp
  doc/conf.py | 12 +++++-------
  1 file changed, 5 insertions(+), 7 deletions(-)
 
-diff --git a/doc/conf.py b/doc/conf.py
-index 7b90870..6a3e8a8 100644
 --- a/doc/conf.py
 +++ b/doc/conf.py
-@@ -262,13 +262,11 @@ trim_doctests_flags = True
+@@ -304,13 +304,11 @@
+ 
  # intersphinx configuration
  intersphinx_mapping = {
-     'python': ('https://docs.python.org/{.major}'.format(
--        sys.version_info), None),
--    'numpy': ('https://docs.scipy.org/doc/numpy/', None),
--    'scipy': ('https://docs.scipy.org/doc/scipy/reference', None),
--    'matplotlib': ('https://matplotlib.org/', None),
--    'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
--    'joblib': ('https://joblib.readthedocs.io/en/latest/', None),
--    'seaborn': ('https://seaborn.pydata.org/', None),
-+         sys.version_info), '/usr/share/doc/python3/html/objects.inv'),
-+    'numpy': ('https://docs.scipy.org/doc/numpy/', '/usr/share/doc/python-numpy-doc/html/objects.inv'),
-+    'scipy': ('https://docs.scipy.org/doc/scipy/reference', '/usr/share/doc/python-scipy-doc/html/objects.inv'),
-+    'matplotlib': ('https://matplotlib.org/', '/usr/share/doc/python-matplotlib-doc/html/objects.inv'),
-+    'pandas': ('https://matplotlib.org/', '/usr/share/doc/python-pandas-doc/html/objects.inv'),
+-    "python": ("https://docs.python.org/{.major}".format(sys.version_info), None),
+-    "numpy": ("https://numpy.org/doc/stable", None),
+-    "scipy": ("https://docs.scipy.org/doc/scipy/", None),
+-    "matplotlib": ("https://matplotlib.org/", None),
+-    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+-    "joblib": ("https://joblib.readthedocs.io/en/latest/", None),
+-    "seaborn": ("https://seaborn.pydata.org/", None),
++    "python": ("https://docs.python.org/{.major}".format(sys.version_info), '/usr/share/doc/python3/html/objects.inv'),
++    "numpy": ("https://numpy.org/doc/stable", '/usr/share/doc/python-numpy-doc/html/objects.inv'),
++    "scipy": ("https://docs.scipy.org/doc/scipy/", '/usr/share/doc/python-scipy-doc/html/objects.inv'),
++    "matplotlib": ("https://matplotlib.org/", '/usr/share/doc/python-matplotlib-doc/html/objects.inv'),
++    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", '/usr/share/doc/python-pandas-doc/html/objects.inv'),
  }
  
  v = parse(release)
diff -pruN 0.23.2-5/debian/patches/NumPy-div-by-zero-check.patch 1.1.1-1/debian/patches/NumPy-div-by-zero-check.patch
--- 0.23.2-5/debian/patches/NumPy-div-by-zero-check.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/NumPy-div-by-zero-check.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,99 +0,0 @@
-From: Christian Kastner <ckk@debian.org>
-Date: Thu, 30 Jul 2020 16:58:06 +0200
-Subject: NumPy div-by-zero check
-
-Skip some tests where NumPy does not report FP division-by-zero, which
-has been observed on armel (soft-float).
-
-This is not a bug in NumPy, but rather originates with the underlying
-platform. See also
- * https://lists.debian.org/debian-arm/2020/02/msg00076.html.
- * https://github.com/numpy/numpy/issues/15562
----
- sklearn/feature_extraction/tests/test_text.py | 8 ++++++++
- sklearn/metrics/tests/test_classification.py  | 7 +++++++
- sklearn/metrics/tests/test_ranking.py         | 6 ++++++
- 3 files changed, 21 insertions(+)
-
-diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
-index 4770080..d798335 100644
---- a/sklearn/feature_extraction/tests/test_text.py
-+++ b/sklearn/feature_extraction/tests/test_text.py
-@@ -1,6 +1,7 @@
- # -*- coding: utf-8 -*-
- from collections.abc import Mapping
- import re
-+import warnings
- 
- import pytest
- from scipy import sparse
-@@ -76,6 +77,12 @@ def lazy_analyze(s):
-     return ['the_ultimate_feature']
- 
- 
-+# This has been observed on 32-bit ARM with soft float, for example
-+with warnings.catch_warnings(record=True) as w:
-+    1. / np.array([0.])
-+    numpy_lacks_div0_warning = len(w) == 0
-+
-+
- def test_strip_accents():
-     # check some classical latin accentuated symbols
-     a = 'àáâãäåçèéêë'
-@@ -362,6 +369,7 @@ def test_tf_idf_smoothing():
-     assert (tfidf >= 0).all()
- 
- 
-+@pytest.mark.skipif(numpy_lacks_div0_warning, reason='No div_by_zero warning')
- def test_tfidf_no_smoothing():
-     X = [[1, 1, 1],
-          [1, 1, 0],
-diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
-index 1f959d9..bdecf2d 100644
---- a/sklearn/metrics/tests/test_classification.py
-+++ b/sklearn/metrics/tests/test_classification.py
-@@ -99,6 +99,12 @@ def make_prediction(dataset=None, binary=False):
-     return y_true, y_pred, probas_pred
- 
- 
-+# This has been observed on 32-bit ARM with soft float, for example
-+with warnings.catch_warnings(record=True) as w:
-+    1. / np.array([0.])
-+    numpy_lacks_div0_warning = len(w) == 0
-+
-+
- ###############################################################################
- # Tests
- 
-@@ -685,6 +691,7 @@ def test_matthews_corrcoef():
-                                               sample_weight=mask), 0.)
- 
- 
-+@pytest.mark.skipif(numpy_lacks_div0_warning, reason='No div_by_zero warning')
- def test_matthews_corrcoef_multiclass():
-     rng = np.random.RandomState(0)
-     ord_a = ord('a')
-diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
-index a66ff95..423526f 100644
---- a/sklearn/metrics/tests/test_ranking.py
-+++ b/sklearn/metrics/tests/test_ranking.py
-@@ -78,6 +78,11 @@ def make_prediction(dataset=None, binary=False):
-     y_true = y[half:]
-     return y_true, y_pred, probas_pred
- 
-+# This has been observed on 32-bit ARM with soft float, for example
-+with warnings.catch_warnings(record=True) as w:
-+    1. / np.array([0.])
-+    numpy_lacks_div0_warning = len(w) == 0
-+
- 
- ###############################################################################
- # Tests
-@@ -755,6 +760,7 @@ def test_precision_recall_curve_errors():
-         precision_recall_curve([0, 1, 2], [[0.0], [1.0], [1.0]])
- 
- 
-+@pytest.mark.skipif(numpy_lacks_div0_warning, reason='No div_by_zero warning')
- def test_precision_recall_curve_toydata():
-     with np.errstate(all="raise"):
-         # Binary classification
diff -pruN 0.23.2-5/debian/patches/parallel-cythonize.patch 1.1.1-1/debian/patches/parallel-cythonize.patch
--- 0.23.2-5/debian/patches/parallel-cythonize.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/parallel-cythonize.patch	2022-05-23 15:38:12.000000000 +0000
@@ -10,23 +10,18 @@ Last-Update: 2020-02-13
  sklearn/_build_utils/__init__.py | 9 +--------
  1 file changed, 1 insertion(+), 8 deletions(-)
 
-diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
-index 8c57982..cc98a6b 100644
 --- a/sklearn/_build_utils/__init__.py
 +++ b/sklearn/_build_utils/__init__.py
-@@ -63,14 +63,7 @@ def cythonize_extensions(top_path, config):
+@@ -60,11 +60,7 @@
      #   to actually build the compiled extensions with OpenMP flags if needed.
      sklearn._OPENMP_SUPPORTED = check_openmp_support()
  
 -    n_jobs = 1
 -    with contextlib.suppress(ImportError):
 -        import joblib
--        if LooseVersion(joblib.__version__) > LooseVersion("0.13.0"):
--            # earlier joblib versions don't account for CPU affinity
--            # constraints, and may over-estimate the number of available
--            # CPU particularly in CI (cf loky#114)
--            n_jobs = joblib.cpu_count()
+-
+-        n_jobs = joblib.cpu_count()
 +    n_jobs = int(os.environ.get('JOBS', 1))
  
-     config.ext_modules = cythonize(
-         config.ext_modules,
+     # Additional checks for Cython
+     cython_enable_debug_directives = (
diff -pruN 0.23.2-5/debian/patches/relax_test_tolerance.patch 1.1.1-1/debian/patches/relax_test_tolerance.patch
--- 0.23.2-5/debian/patches/relax_test_tolerance.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/debian/patches/relax_test_tolerance.patch	2022-05-23 15:38:12.000000000 +0000
@@ -0,0 +1,30 @@
+Author: Andreas Tille <tille@debian.org>
+Last-Update: Tue, 15 Feb 2022 13:33:35 +0100
+Origin: https://salsa.debian.org/science-team/scikit-learn/-/jobs/2469653
+Description: Relax test tolerance - this patch is needed at least for i386 (not for amd64)
+ >       assert_allclose(rbm_64.components_, rbm_32.components_, rtol=1e-03, atol=0)
+ E       AssertionError: 
+ E       Not equal to tolerance rtol=0.001, atol=0
+ E       
+ E       Mismatched elements: 1 / 1024 (0.0977%)
+ E       Max absolute difference: 1.64902041e-07
+ E       Max relative difference: 0.00120168
+ E        x: array([[-0.314464, -0.269352, -0.050074, ...,  0.032314, -0.184839,
+ E               -0.34867 ],
+ E              [-0.315646, -0.263143, -0.079185, ...,  0.054781, -0.178217,...
+ E        y: array([[-0.314464, -0.269352, -0.050074, ...,  0.032314, -0.184839,
+ E               -0.34867 ],
+ E              [-0.315646, -0.263143, -0.079185, ...,  0.054781, -0.178217,...
+ sklearn/neural_network/tests/test_rbm.py:239: AssertionError
+
+--- a/sklearn/neural_network/tests/test_rbm.py
++++ b/sklearn/neural_network/tests/test_rbm.py
+@@ -236,7 +236,7 @@
+     assert_allclose(
+         rbm_64.intercept_visible_, rbm_32.intercept_visible_, rtol=1e-05, atol=0
+     )
+-    assert_allclose(rbm_64.components_, rbm_32.components_, rtol=1e-03, atol=0)
++    assert_allclose(rbm_64.components_, rbm_32.components_, rtol=1e-02, atol=1e-06)
+     assert_allclose(rbm_64.h_samples_, rbm_32.h_samples_)
+ 
+ 
diff -pruN 0.23.2-5/debian/patches/remove-google-analytics.patch 1.1.1-1/debian/patches/remove-google-analytics.patch
--- 0.23.2-5/debian/patches/remove-google-analytics.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/remove-google-analytics.patch	2022-05-23 15:38:12.000000000 +0000
@@ -17,28 +17,24 @@ Last-Update: 2013-11-24
  doc/themes/scikit-learn/theme.conf | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)
 
-diff --git a/doc/conf.py b/doc/conf.py
-index 539229a..7b90870 100644
 --- a/doc/conf.py
 +++ b/doc/conf.py
-@@ -142,7 +142,7 @@ html_theme = 'scikit-learn-modern'
- # Theme options are theme-specific and customize the look and feel of a theme
+@@ -159,7 +159,7 @@
  # further.  For a list of options available for each theme, see the
  # documentation.
--html_theme_options = {'google_analytics': True,
-+html_theme_options = {'google_analytics': False,
-                       'mathjax_path': mathjax_path}
+ html_theme_options = {
+-    "google_analytics": True,
++    "google_analytics": False,
+     "mathjax_path": mathjax_path,
+     "link_to_live_contributing_page": not parsed_version.is_devrelease,
+ }
+--- a/doc/themes/scikit-learn-modern/theme.conf
++++ b/doc/themes/scikit-learn-modern/theme.conf
+@@ -4,6 +4,6 @@
+ stylesheet = css/theme.css
  
- # Add any paths that contain custom themes here, relative to this directory.
-diff --git a/doc/themes/scikit-learn/theme.conf b/doc/themes/scikit-learn/theme.conf
-index 716b82a..af1c34d 100644
---- a/doc/themes/scikit-learn/theme.conf
-+++ b/doc/themes/scikit-learn/theme.conf
-@@ -6,6 +6,6 @@ pygments_style = tango
  [options]
- oldversion = False
- collapsiblesidebar = True
--google_analytics = True
-+google_analytics = False
- surveybanner = False
- sprintbanner = True
+-google_analytics = true
++google_analytics = false
+ link_to_live_contributing_page = false
+ mathjax_path =
diff -pruN 0.23.2-5/debian/patches/remove-paypal.patch 1.1.1-1/debian/patches/remove-paypal.patch
--- 0.23.2-5/debian/patches/remove-paypal.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/remove-paypal.patch	2022-05-23 15:38:12.000000000 +0000
@@ -10,11 +10,11 @@ Last-Update: 2020-02-13
  doc/about.rst | 26 +++++---------------------
  1 file changed, 5 insertions(+), 21 deletions(-)
 
-diff --git a/doc/about.rst b/doc/about.rst
-index 814a472..bdc4e38 100644
---- a/doc/about.rst
-+++ b/doc/about.rst
-@@ -484,9 +484,11 @@ Donating to the project
+Index: scikit-learn/doc/about.rst
+===================================================================
+--- scikit-learn.orig/doc/about.rst	2021-11-27 09:55:11.086045457 +0100
++++ scikit-learn/doc/about.rst	2021-11-27 09:55:11.082045212 +0100
+@@ -594,9 +594,11 @@
  .......................
  
  If you are interested in donating to the project or to one of our code-sprints,
@@ -29,28 +29,3 @@ index 814a472..bdc4e38 100644
  
  All donations will be handled by `NumFOCUS
  <https://numfocus.org/>`_, a non-profit-organization which is
-@@ -500,24 +502,6 @@ The received donations for the scikit-learn project mostly will go towards
- covering travel-expenses for code sprints, as well as towards the organization
- budget of the project [#f1]_.
- 
--.. raw :: html
--
--    </br></br>
--    <form action="https://www.paypal.com/cgi-bin/webscr" method="post"
--    target="_top">
--    <input type="hidden" name="cmd" value="_s-xclick">
--    <input type="hidden" name="hosted_button_id" value="74EYUMF3FTSW8">
--    <input type="image"
--    src="https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif"
--    border="0" name="submit" alt="PayPal - The safer, easier way to pay
--    online!" style="position: relative;
--    left: 40%;">
--    <img alt="" border="0"
--    src="https://www.paypalobjects.com/en_US/i/scr/pixel.gif" width="1"
--    height="1">
--    </form>
--    </br>
--
- .. rubric:: Notes
- 
- .. [#f1] Regarding the organization budget, in particular, we might use some of
diff -pruN 0.23.2-5/debian/patches/series 1.1.1-1/debian/patches/series
--- 0.23.2-5/debian/patches/series	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/series	2022-05-23 15:38:12.000000000 +0000
@@ -1,17 +1,9 @@
+relax_test_tolerance.patch
+Disable-BinderHub-links.patch
+parallel-cythonize.patch
+no-download-intersphinx-mapping.patch
 remove-google-analytics.patch
 git-revision-cmd.patch
 remove-paypal.patch
-no-download-intersphinx-mapping.patch
-disable-doctest.patch
-parallel-cythonize.patch
 no-buttons-js.patch
 Use-local-MathJax.patch
-Disable-BinderHub-links.patch
-Skip-two-tests-when-SciPy-1.5.patch
-Skip-k-means-convergence-tests.patch
-NumPy-div-by-zero-check.patch
-Use-assert_allclose-instead-of-equality-in-FP-comparison.patch
-Change-expected-result-type-to-pass-on-32-bit.patch
-Increase-tolerance-for-RANSACRegressor-test.patch
-fix_scipy1.6_PR18711.patch
-skip_test_ELKI_PR19115.patch
diff -pruN 0.23.2-5/debian/patches/Skip-k-means-convergence-tests.patch 1.1.1-1/debian/patches/Skip-k-means-convergence-tests.patch
--- 0.23.2-5/debian/patches/Skip-k-means-convergence-tests.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/Skip-k-means-convergence-tests.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,21 +0,0 @@
-From: Christian Kastner <ckk@debian.org>
-Date: Mon, 20 Jul 2020 18:20:41 +0200
-Subject: Skip k-means convergence tests
-
-Until GH 17428 is resolved.
----
- sklearn/cluster/tests/test_k_means.py | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
-index b53fd5b..6183f44 100644
---- a/sklearn/cluster/tests/test_k_means.py
-+++ b/sklearn/cluster/tests/test_k_means.py
-@@ -163,6 +163,7 @@ def test_kmeans_elkan_results(distribution, array_constr, tol):
-     assert km_elkan.inertia_ == pytest.approx(km_full.inertia_, rel=1e-6)
- 
- 
-+@pytest.mark.skip(reason='GH 17428')
- @pytest.mark.parametrize('algorithm', ['full', 'elkan'])
- def test_kmeans_convergence(algorithm):
-     # Check that KMeans stops when convergence is reached when tol=0. (#16075)
diff -pruN 0.23.2-5/debian/patches/skip_test_ELKI_PR19115.patch 1.1.1-1/debian/patches/skip_test_ELKI_PR19115.patch
--- 0.23.2-5/debian/patches/skip_test_ELKI_PR19115.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/skip_test_ELKI_PR19115.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,82 +0,0 @@
-From 0387afba0758b833ca6a30af7fde75f82f4ae96a Mon Sep 17 00:00:00 2001
-From: "Thomas J. Fan" <thomasjpfan@gmail.com>
-Date: Tue, 5 Jan 2021 10:25:22 -0500
-Subject: [PATCH 1/2] TST Skips test for arm [cd build]
-
----
- sklearn/cluster/tests/test_optics.py | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
-index 03ca4995c044..1f9b872752fc 100644
---- a/sklearn/cluster/tests/test_optics.py
-+++ b/sklearn/cluster/tests/test_optics.py
-@@ -1,6 +1,7 @@
- # Authors: Shane Grigsby <refuge@rocktalus.com>
- #          Adrin Jalali <adrin.jalali@gmail.com>
- # License: BSD 3 clause
-+import platform
- 
- import numpy as np
- import pytest
-@@ -15,6 +16,7 @@
- from sklearn.utils._testing import assert_array_equal
- from sklearn.utils._testing import assert_raise_message
- from sklearn.utils._testing import assert_allclose
-+from sklearn.utils.fixes import sp_version, parse_version
- 
- from sklearn.cluster.tests.common import generate_clustered_data
- 
-@@ -314,6 +316,9 @@ def test_processing_order():
-     assert_array_equal(clust.ordering_, [0, 1, 2, 3])
- 
- 
-+@pytest.mark.skipif(sp_version >= parse_version("1.6.0")
-+                    and platform.machine() == 'aarch64',
-+                    reason="Test fails for SciPy 1.6.0 on ARM. See #19111")
- def test_compare_to_ELKI():
-     # Expected values, computed with (future) ELKI 0.7.5 using:
-     # java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter
-
-From 0ebc78ad0b96b01de47ee8c39556c761b805cb50 Mon Sep 17 00:00:00 2001
-From: "Thomas J. Fan" <thomasjpfan@gmail.com>
-Date: Tue, 5 Jan 2021 15:59:19 -0500
-Subject: [PATCH 2/2] CI Skip for 32bit linux [cd build]
-
----
- sklearn/cluster/tests/test_optics.py | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
-index 1f9b872752fc..4428b6c00d7e 100644
---- a/sklearn/cluster/tests/test_optics.py
-+++ b/sklearn/cluster/tests/test_optics.py
-@@ -2,6 +2,7 @@
- #          Adrin Jalali <adrin.jalali@gmail.com>
- # License: BSD 3 clause
- import platform
-+import sys
- 
- import numpy as np
- import pytest
-@@ -19,6 +20,7 @@
- from sklearn.utils.fixes import sp_version, parse_version
- 
- from sklearn.cluster.tests.common import generate_clustered_data
-+from sklearn.utils import _IS_32BIT
- 
- 
- rng = np.random.RandomState(0)
-@@ -317,8 +319,10 @@ def test_processing_order():
- 
- 
- @pytest.mark.skipif(sp_version >= parse_version("1.6.0")
--                    and platform.machine() == 'aarch64',
--                    reason="Test fails for SciPy 1.6.0 on ARM. See #19111")
-+                    and (platform.machine() == "aarch64" or
-+                         (sys.platform == "linux" and _IS_32BIT)),
-+                    reason=("Test fails for SciPy 1.6.0 on ARM and on 32-bit "
-+                            "linux. See #19111"))
- def test_compare_to_ELKI():
-     # Expected values, computed with (future) ELKI 0.7.5 using:
-     # java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter
diff -pruN 0.23.2-5/debian/patches/Skip-two-tests-when-SciPy-1.5.patch 1.1.1-1/debian/patches/Skip-two-tests-when-SciPy-1.5.patch
--- 0.23.2-5/debian/patches/Skip-two-tests-when-SciPy-1.5.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/Skip-two-tests-when-SciPy-1.5.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,43 +0,0 @@
-From: Christian Kastner <ckk@debian.org>
-Date: Mon, 20 Jul 2020 13:14:32 +0200
-Subject: Skip two tests when SciPy < 1.5
-
-Forwarded: not-needed
----
- sklearn/ensemble/tests/test_voting.py       | 5 +++++
- sklearn/linear_model/tests/test_logistic.py | 6 +++++-
- 2 files changed, 10 insertions(+), 1 deletion(-)
-
-diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
-index f81b9e5..fe93e63 100644
---- a/sklearn/ensemble/tests/test_voting.py
-+++ b/sklearn/ensemble/tests/test_voting.py
-@@ -359,6 +359,11 @@ def test_voting_classifier_set_params():
- 
- 
- # TODO: Remove parametrization in 0.24 when None is removed in Voting*
-+# Debian: This test fails with scipy < 1.5 because of
-+#         https://github.com/scipy/scipy/pull/11755
-+import scipy
-+svers = scipy.__version__.split('.')
-+@pytest.mark.skipif(int(svers[0]) <= 1 and int(svers[1]) < 5, reason='outdated SciPy')
- @pytest.mark.parametrize("drop", [None, 'drop'])
- def test_set_estimator_none(drop):
-     """VotingClassifier set_params should be able to set estimators as None or
-diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
-index 9687f0b..2b7d5ac 100644
---- a/sklearn/linear_model/tests/test_logistic.py
-+++ b/sklearn/linear_model/tests/test_logistic.py
-@@ -382,7 +382,11 @@ def test_consistency_path():
-         assert_array_almost_equal(lr_coef, coefs[0], decimal=4,
-                                   err_msg="with solver = %s" % solver)
- 
--
-+# Debian: This test fails with scipy < 1.5 because of
-+#         https://github.com/scipy/scipy/pull/11755
-+import scipy
-+svers = scipy.__version__.split('.')
-+@pytest.mark.skipif(int(svers[0]) <= 1 and int(svers[1]) < 5, reason='outdated SciPy')
- def test_logistic_regression_path_convergence_fail():
-     rng = np.random.RandomState(0)
-     X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
diff -pruN 0.23.2-5/debian/patches/Use-assert_allclose-instead-of-equality-in-FP-comparison.patch 1.1.1-1/debian/patches/Use-assert_allclose-instead-of-equality-in-FP-comparison.patch
--- 0.23.2-5/debian/patches/Use-assert_allclose-instead-of-equality-in-FP-comparison.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/Use-assert_allclose-instead-of-equality-in-FP-comparison.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,24 +0,0 @@
-From: Christian Kastner <ckk@kvr.at>
-Date: Sat, 1 Aug 2020 08:00:37 +0200
-Subject: Use assert_allclose instead of equality in FP comparison
-
-Origin: https://github.com/scikit-learn/scikit-learn/pull/18053
-Bug: https://github.com/scikit-learn/scikit-learn/issues/18031
----
- sklearn/decomposition/tests/test_pca.py | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
-index 0123e16..3548c91 100644
---- a/sklearn/decomposition/tests/test_pca.py
-+++ b/sklearn/decomposition/tests/test_pca.py
-@@ -633,7 +633,8 @@ def test_assess_dimesion_rank_one():
-     n_samples, n_features = 9, 6
-     X = np.ones((n_samples, n_features))  # rank 1 matrix
-     _, s, _ = np.linalg.svd(X, full_matrices=True)
--    assert sum(s[1:]) == 0  # except for rank 1, all eigenvalues are 0
-+    # except for rank 1, all eigenvalues are 0 resp. close to 0 (FP)
-+    assert_allclose(s[1:], np.zeros(n_features-1), atol=1e-12)
- 
-     assert np.isfinite(_assess_dimension(s, rank=1, n_samples=n_samples))
-     for rank in range(2, n_features):
diff -pruN 0.23.2-5/debian/patches/Use-local-MathJax.patch 1.1.1-1/debian/patches/Use-local-MathJax.patch
--- 0.23.2-5/debian/patches/Use-local-MathJax.patch	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/patches/Use-local-MathJax.patch	2022-05-23 15:38:12.000000000 +0000
@@ -7,17 +7,14 @@ Forwarded: not-needed
  doc/conf.py | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)
 
-diff --git a/doc/conf.py b/doc/conf.py
-index 6a3e8a8..701fcee 100644
 --- a/doc/conf.py
 +++ b/doc/conf.py
-@@ -55,8 +55,7 @@ if os.environ.get('NO_MATHJAX'):
-     mathjax_path = ''
+@@ -75,7 +75,7 @@
+     mathjax_path = ""
  else:
-     extensions.append('sphinx.ext.mathjax')
--    mathjax_path = ('https://cdn.jsdelivr.net/npm/mathjax@3/es5/'
--                    'tex-chtml.js')
-+    mathjax_path = 'file:///usr/share/javascript/mathjax/MathJax.js?config=TeX-AMS_CHTML'
+     extensions.append("sphinx.ext.mathjax")
+-    mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"
++    mathjax_path = "file:///usr/share/javascript/mathjax/MathJax.js?config=TeX-AMS_CHTML"
+ 
+ autodoc_default_options = {"members": True, "inherited-members": True}
  
- autodoc_default_options = {
-     'members': True,
diff -pruN 0.23.2-5/debian/rules 1.1.1-1/debian/rules
--- 0.23.2-5/debian/rules	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/rules	2022-05-23 15:38:12.000000000 +0000
@@ -3,6 +3,9 @@
 
 # See also: skimage
 
+# This was used as last resort to advance the python3.10 transition by Paul Gevers in NMU, disable the tests during build (see: #1003165)
+# export DEB_BUILD_OPTIONS += nocheck
+
 include /usr/share/dpkg/architecture.mk
 
 PY3VER  ?= $(shell py3versions -vd)
@@ -24,12 +27,11 @@ export JOBLIB_MULTIPROCESSING := $(shell
 # things in the doc building and tests try to access stuff in HOME (https://bugs.debian.org/915078)
 export HOME=$(CURDIR)/tmp
 
-
-# until the following are fixed:
-#  https://github.com/scikit-learn/scikit-learn/issues/16101
-#  https://github.com/scikit-learn/scikit-learn/issues/15420
-EXCLUDE_TESTS = not test_old_pickle
-EXCLUDE_TESTS += and not test_ard_accuracy_on_easy_problem
+# don't run network tests at build time
+EXCLUDE_TESTS = not test_load_boston_alternative
+# Until https://github.com/scikit-learn/scikit-learn/issues/17798
+# is not solved
+EXCLUDE_TEST += and not sklearn.ensemble._weight_boosting.AdaBoostRegressor
 
 # Some tests are known to fail randomly so need to be excluded ATM
 ifeq ($(DEB_HOST_ARCH),arm64)
@@ -37,17 +39,6 @@ ifeq ($(DEB_HOST_ARCH),arm64)
   EXCLUDE_TESTS += and not test_dump
 endif
 
-# Until this is fixed:
-# https://github.com/scikit-learn/scikit-learn/issues/16794
-ifeq ($(DEB_HOST_ARCH),ppc64el)
-  EXCLUDE_TESTS += and not test_precomputed_nearest_neighbors_filtering
-  # This is actually a test generator, so this skips a *lot* of tests
-  EXCLUDE_TESTS += and not test_common
-  # These are is flaky
-  EXCLUDE_TESTS += and not test_stacking_cv_influence
-  EXCLUDE_TESTS += and not test_stacking_with_sample_weight
-endif
-
 # Until these are fixed:
 # https://github.com/scikit-learn/scikit-learn/issues/16443
 # https://github.com/scikit-learn/scikit-learn/issues/13052
@@ -71,10 +62,15 @@ ifeq ($(DEB_HOST_ARCH),alpha)
   EXCLUDE_TESTS += and not test_gaussian_kde
 endif
 # Same test, but not entirely the same results as above. Assuming same cause
-ifeq ($(DEB_HOST_ARCH),ppc64)
+ifeq ($(DEB_HOST_ARCH),i386)
   EXCLUDE_TESTS += and not test_gaussian_kde
+  EXCLUDE_TESTS += and not test_y_multioutput
 endif
 
+# Until https://github.com/scikit-learn/scikit-learn/issues/19230 is open
+ifeq ($(DEB_HOST_ARCH),i368)
+  EXCLUDE_TESTS += and not test_convergence_dtype_consistency
+endif
 
 %:
 	dh $@ --with python3,sphinxdoc --buildsystem pybuild
@@ -136,7 +132,7 @@ override_dh_auto_build-arch:
 
 
 export PYBUILD_TEST_PYTEST := 1
-export PYBUILD_TEST_ARGS_python3 := -m "not network" -v -k "$(EXCLUDE_TESTS)"
+export PYBUILD_TEST_ARGS_python3 := -m "not network" -v --color=no -k "$(EXCLUDE_TESTS)"
 define PYBUILD_BEFORE_TEST
   (mv $(CURDIR)/sklearn/conftest.py $(CURDIR)/sklearn/conftest.py.test; \
    mv $(CURDIR)/sklearn/datasets/tests/conftest.py $(CURDIR)/sklearn/datasets/tests/conftest.py.test; \
diff -pruN 0.23.2-5/debian/salsa-ci.yml 1.1.1-1/debian/salsa-ci.yml
--- 0.23.2-5/debian/salsa-ci.yml	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/debian/salsa-ci.yml	2022-05-23 15:38:12.000000000 +0000
@@ -0,0 +1,6 @@
+include:
+ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml
+ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml
+
+variables:
+  RELEASE: 'unstable'
diff -pruN 0.23.2-5/debian/source/lintian-overrides 1.1.1-1/debian/source/lintian-overrides
--- 0.23.2-5/debian/source/lintian-overrides	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/source/lintian-overrides	2022-05-23 15:38:12.000000000 +0000
@@ -1,13 +1,13 @@
 # It is the source
-scikit-learn source: source-is-missing doc/themes/scikit-learn/static/jquery.maphilight.js *
+scikit-learn source: source-is-missing doc/themes/scikit-learn-modern/static/jquery.maphilight.js *
 # The following files are not used, they are replaced with links to the system-wide ones
 scikit-learn source: source-is-missing doc/themes/scikit-learn-modern/static/js/vendor/bootstrap.min.js
 scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn-modern/static/js/vendor/bootstrap.min.js
 # The following files are not used/shipped (they are from an obsolete theme)
-scikit-learn source: source-is-missing doc/themes/scikit-learn/static/jquery.js line length is 32030 characters (>512)
-scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn/static/jquery.js line length is 32030 characters (>512)
-scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn/static/jquery.maphilight.js line length is 337 characters (>256)
-scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn/static/jquery.maphilight.min.js
-scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn/static/js/bootstrap.min.js
-scikit-learn source: very-long-line-length-in-source-file doc/themes/scikit-learn/static/jquery.js line length is 32030 characters (>512)
+scikit-learn source: source-is-missing doc/themes/scikit-learn-modern/static/jquery.js line length is *
+scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn-modern/static/jquery.js line length is *
+scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn-modern/static/jquery.maphilight.js line length is *
+scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn-modern/static/jquery.maphilight.min.js
+scikit-learn source: source-contains-prebuilt-javascript-object doc/themes/scikit-learn-modern/static/js/bootstrap.min.js
+scikit-learn source: very-long-line-length-in-source-file doc/themes/scikit-learn-modern/static/jquery.js line length is *
 
diff -pruN 0.23.2-5/debian/tests/control 1.1.1-1/debian/tests/control
--- 0.23.2-5/debian/tests/control	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/tests/control	2022-05-23 15:38:12.000000000 +0000
@@ -1,3 +1,11 @@
 Tests: python3
-Depends: python3-all, python3-nose, python3-sklearn, python3-numpy, python3-scipy, python3-nose, python3-pytest, python3-matplotlib, python3-joblib (>= 0.9.2),
+Depends: python3-all,
+ python3-nose,
+ python3-sklearn,
+ python3-joblib (>= 0.9.2),
+ python3-matplotlib,
+ python3-numpy,
+ python3-pytest,
+ python3-scipy,
+ python3-setuptools
 Restrictions: allow-stderr
diff -pruN 0.23.2-5/debian/tests/python3 1.1.1-1/debian/tests/python3
--- 0.23.2-5/debian/tests/python3	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/tests/python3	2022-05-23 15:38:12.000000000 +0000
@@ -41,5 +41,5 @@ for py in $pys; do
         cd "$AUTOPKGTEST_TMP"
     fi
 
-    python$py -m pytest -m 'not network' -s -v $module -k "$exclude_tests" 2>&1
+    python$py -m pytest -m 'not network' -s -v --color=no $module -k "$exclude_tests" 2>&1
 done
diff -pruN 0.23.2-5/debian/TODO 1.1.1-1/debian/TODO
--- 0.23.2-5/debian/TODO	2021-01-19 11:20:59.000000000 +0000
+++ 1.1.1-1/debian/TODO	1970-01-01 00:00:00.000000000 +0000
@@ -1,21 +0,0 @@
-# emacs: -*- mode: org; indent-tabs-mode: nil -*-
-
-* Modularization
-** Use now-available system-wide liblinear?
-** Use system-wide atlas?
-
-ATM Debian carries stable upstream release 3.8.3, while scikits.learn
-uses 3.9.25 (current development upstream release is 3.9.32,
-see #609287)
-
-* provide .doc-base file
-* provide -dbg package
-
-
-* lintian E/W:
-
-neurodebian@head2:~/deb/builds/scikit-learn/0.16.1-2$ lintian scikit-learn_0.16.1-2_amd64.changes
-W: scikit-learn source: invalid-short-name-in-dep5-copyright bsd (paragraph at line 10)
-W: python-sklearn: image-file-in-usr-lib usr/lib/python2.7/dist-packages/sklearn/datasets/images/china.jpg
-W: python-sklearn: image-file-in-usr-lib usr/lib/python2.7/dist-packages/sklearn/datasets/images/flower.jpg
-
diff -pruN 0.23.2-5/debian/upstream/metadata 1.1.1-1/debian/upstream/metadata
--- 0.23.2-5/debian/upstream/metadata	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/debian/upstream/metadata	2022-05-23 15:38:12.000000000 +0000
@@ -0,0 +1,15 @@
+---
+Bug-Database: https://github.com/scikit-learn/scikit-learn/issues
+Bug-Submit: https://github.com/scikit-learn/scikit-learn/issues/new
+Repository: https://github.com/scikit-learn/scikit-learn.git
+Repository-Browse: https://github.com/scikit-learn/scikit-learn
+Registry:
+ - Name: conda:conda-forge
+   Entry: scikit-learn
+ - Name: bio.tools
+   Entry: NA
+   Checked: 2021-09-06
+ - Name: guix:machine-learning
+   Entry: python-scikit-learn
+ - Name: SciCrunch
+   Entry: SCR_002577
diff -pruN 0.23.2-5/doc/about.rst 1.1.1-1/doc/about.rst
--- 0.23.2-5/doc/about.rst	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/about.rst	2022-05-19 12:16:26.432781500 +0000
@@ -18,6 +18,7 @@ been leading the development.
 
 Governance
 ----------
+
 The decision making process and governance structure of scikit-learn is laid
 out in the :ref:`governance document <governance>`.
 
@@ -38,13 +39,40 @@ in the FAQ.
 
    :ref:`How you can contribute to the project <contributing>`
 
+Contributor Experience Team
+---------------------------
+
+The following people are active contributors who also help with
+:ref:`triaging issues <bug_triaging>`, PRs, and general
+maintenance:
+
+.. include:: contributor_experience_team.rst
+
+Communication Team
+------------------
+
+The following people help with :ref:`communication around scikit-learn
+<communication_team>`.
+
+.. include:: communication_team.rst
+
+
 Emeritus Core Developers
 ------------------------
+
 The following people have been active contributors in the past, but are no
 longer active in the project:
 
 .. include:: authors_emeritus.rst
 
+Emeritus Communication Team
+---------------------------
+
+The following people have been active in the communication team in the
+past, but no longer have communication responsibilities:
+
+.. include:: communication_team_emeritus.rst
+
 
 .. _citing-scikit-learn:
 
@@ -75,8 +103,8 @@ citations to the following paper:
 If you want to cite scikit-learn for its API or design, you may also want to consider the
 following paper:
 
-  `API design for machine learning software: experiences from the scikit-learn
-  project <https://arxiv.org/abs/1309.0238>`_, Buitinck *et al.*, 2013.
+  :arxiv:`API design for machine learning software: experiences from the scikit-learn
+  project <1309.0238>`, Buitinck *et al.*, 2013.
 
   Bibtex entry::
 
@@ -97,7 +125,7 @@ Artwork
 -------
 
 High quality PNG and SVG logos are available in the `doc/logos/
-<https://github.com/scikit-learn/scikit-learn/tree/master/doc/logos>`_
+<https://github.com/scikit-learn/scikit-learn/tree/main/doc/logos>`_
 source directory.
 
 .. image:: images/scikit-learn-logo-notext.png
@@ -108,7 +136,7 @@ Funding
 Scikit-Learn is a community driven project, however institutional and private
 grants help to assure its sustainability.
 
-The project would like to thank the following funders. 
+The project would like to thank the following funders.
 
 ...................................
 
@@ -120,7 +148,7 @@ The project would like to thank the foll
 The `Members <https://scikit-learn.fondation-inria.fr/en/home/#sponsors>`_ of
 the `Scikit-Learn Consortium at Inria Foundation
 <https://scikit-learn.fondation-inria.fr/en/home/>`_  fund Olivier
-Grisel, Guillaume Lemaitre, Jérémie du Boisberranger and Chiara Marmo.
+Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
 
 .. raw:: html
 
@@ -146,18 +174,14 @@ Grisel, Guillaume Lemaitre, Jérémie du
    :width: 100pt
    :target: https://www.fujitsu.com/global/
 
-.. |intel| image:: images/intel.png
-   :width: 70pt
-   :target: https://www.intel.com/
-
-.. |nvidia| image:: images/nvidia.png
-   :width: 70pt
-   :target: https://www.nvidia.com/
-
 .. |dataiku| image:: images/dataiku.png
    :width: 70pt
    :target: https://www.dataiku.com/
 
+.. |aphp| image:: images/logo_APHP_text.png
+   :width: 150pt
+   :target: https://aphp.fr/
+
 .. |inria| image:: images/inria-logo.jpg
    :width: 100pt
    :target: https://www.inria.fr
@@ -171,17 +195,19 @@ Grisel, Guillaume Lemaitre, Jérémie du
    :class: sk-sponsor-table align-default
 
    +---------+----------+
-   | |msn|   |  |bcg|   |
+   |       |bcg|        |
    +---------+----------+
    |                    |
-   +---------+----------+ 
+   +---------+----------+
    |  |axa|  |   |bnp|  |
    +---------+----------+
-   ||fujitsu||  |intel| |
+   ||fujitsu||  |msn|   |
    +---------+----------+
    |                    |
    +---------+----------+
-   ||dataiku|| |nvidia| |
+   |     |dataiku|      |
+   +---------+----------+
+   |       |aphp|       |
    +---------+----------+
    |                    |
    +---------+----------+
@@ -193,14 +219,14 @@ Grisel, Guillaume Lemaitre, Jérémie du
    </div>
    </div>
 
-........  
+..........
 
 .. raw:: html
 
    <div class="sk-sponsor-div">
    <div class="sk-sponsor-div-box">
 
-`Columbia University <https://columbia.edu/>`_ funds Andreas Müller since 2016
+`Hugging Face <https://huggingface.co/>`_ funds Adrin Jalali since 2022.
 
 .. raw:: html
 
@@ -208,26 +234,24 @@ Grisel, Guillaume Lemaitre, Jérémie du
 
    <div class="sk-sponsor-div-box">
 
-.. image:: themes/scikit-learn/static/img/columbia.png 
-   :width: 50pt
+.. image:: images/huggingface_logo-noborder.png
+   :width: 55pt
    :align: center
-   :target: https://www.columbia.edu/
+   :target: https://huggingface.co/
 
 .. raw:: html
 
    </div>
    </div>
 
-..........
+...........
 
 .. raw:: html
 
-   <div class="sk-sponsor-div">   
+   <div class="sk-sponsor-div">
    <div class="sk-sponsor-div-box">
 
-Andreas Müller received a grant to improve scikit-learn from the
-`Alfred P. Sloan Foundation <https://sloan.org>`_ .
-This grant supports the position of Nicolas Hug and Thomas J. Fan.
+`Microsoft <https://microsoft.com/>`_ funds Andreas Müller since 2020.
 
 .. raw:: html
 
@@ -235,10 +259,10 @@ This grant supports the position of Nico
 
    <div class="sk-sponsor-div-box">
 
-.. image:: images/sloan_banner.png
+.. image:: images/microsoft.png
    :width: 100pt
    :align: center
-   :target: https://sloan.org/
+   :target: https://www.microsoft.com/
 
 .. raw:: html
 
@@ -252,8 +276,7 @@ This grant supports the position of Nico
    <div class="sk-sponsor-div">
    <div class="sk-sponsor-div-box">
 
-`The University of Sydney <https://sydney.edu.au/>`_ funds Joel Nothman since
-July 2017.
+`Quansight Labs <https://labs.quansight.org>`_ funds Thomas J. Fan since 2021.
 
 .. raw:: html
 
@@ -261,10 +284,10 @@ July 2017.
 
    <div class="sk-sponsor-div-box">
 
-.. image:: themes/scikit-learn/static/img/sydney-primary.jpeg
+.. image:: images/quansight-labs.png
    :width: 100pt
    :align: center
-   :target: https://sydney.edu.au/
+   :target: https://labs.quansight.org
 
 .. raw:: html
 
@@ -279,10 +302,8 @@ Past Sponsors
    <div class="sk-sponsor-div">
    <div class="sk-sponsor-div-box">
 
-`INRIA <https://www.inria.fr>`_ actively supports this project. It has
-provided funding for Fabian Pedregosa (2010-2012), Jaques Grobler
-(2012-2013) and Olivier Grisel (2013-2017) to work on this project
-full-time. It also hosts coding sprints and other events.
+`Columbia University <https://columbia.edu/>`_ funded Andreas Müller
+(2016-2020).
 
 .. raw:: html
 
@@ -290,52 +311,80 @@ full-time. It also hosts coding sprints
 
    <div class="sk-sponsor-div-box">
 
-.. image:: images/inria-logo.jpg
+.. image:: images/columbia.png
+   :width: 50pt
+   :align: center
+   :target: https://www.columbia.edu/
+
+.. raw:: html
+
+   </div>
+   </div>
+
+........
+
+.. raw:: html
+
+   <div class="sk-sponsor-div">
+   <div class="sk-sponsor-div-box">
+
+`The University of Sydney <https://sydney.edu.au/>`_ funded Joel Nothman
+(2017-2021).
+
+.. raw:: html
+
+   </div>
+
+   <div class="sk-sponsor-div-box">
+
+.. image:: images/sydney-primary.jpeg
    :width: 100pt
    :align: center
-   :target: https://www.inria.fr
+   :target: https://sydney.edu.au/
 
 .. raw:: html
 
    </div>
    </div>
 
-.....................
+...........
 
 .. raw:: html
 
    <div class="sk-sponsor-div">
    <div class="sk-sponsor-div-box">
 
-`Paris-Saclay Center for Data Science
-<https://www.datascience-paris-saclay.fr/>`_
-funded one year for a developer to work on the project full-time
-(2014-2015), 50% of the time of Guillaume Lemaitre (2016-2017) and 50% of the
-time of Joris van den Bossche (2017-2018).
+Andreas Müller received a grant to improve scikit-learn from the
+`Alfred P. Sloan Foundation <https://sloan.org>`_ .
+This grant supported the position of Nicolas Hug and Thomas J. Fan.
 
 .. raw:: html
 
    </div>
+
    <div class="sk-sponsor-div-box">
 
-.. image:: images/cds-logo.png
+.. image:: images/sloan_banner.png
    :width: 100pt
    :align: center
-   :target: https://www.datascience-paris-saclay.fr/
+   :target: https://sloan.org/
 
 .. raw:: html
 
    </div>
    </div>
 
-............
+.............
 
 .. raw:: html
 
    <div class="sk-sponsor-div">
    <div class="sk-sponsor-div-box">
 
-`Anaconda, Inc <https://www.anaconda.com/>`_ funded Adrin Jalali in 2019.
+`INRIA <https://www.inria.fr>`_ actively supports this project. It has
+provided funding for Fabian Pedregosa (2010-2012), Jaques Grobler
+(2012-2013) and Olivier Grisel (2013-2017) to work on this project
+full-time. It also hosts coding sprints and other events.
 
 .. raw:: html
 
@@ -343,10 +392,38 @@ time of Joris van den Bossche (2017-2018
 
    <div class="sk-sponsor-div-box">
 
-.. image:: images/anaconda.png
+.. image:: images/inria-logo.jpg
+   :width: 100pt
+   :align: center
+   :target: https://www.inria.fr
+
+.. raw:: html
+
+   </div>
+   </div>
+
+.....................
+
+.. raw:: html
+
+   <div class="sk-sponsor-div">
+   <div class="sk-sponsor-div-box">
+
+`Paris-Saclay Center for Data Science
+<https://www.datascience-paris-saclay.fr/>`_
+funded one year for a developer to work on the project full-time
+(2014-2015), 50% of the time of Guillaume Lemaitre (2016-2017) and 50% of the
+time of Joris van den Bossche (2017-2018).
+
+.. raw:: html
+
+   </div>
+   <div class="sk-sponsor-div-box">
+
+.. image:: images/cds-logo.png
    :width: 100pt
    :align: center
-   :target: https://www.anaconda.com/
+   :target: https://www.datascience-paris-saclay.fr/
 
 .. raw:: html
 
@@ -396,7 +473,7 @@ part-time.
    </div>
    <div class="sk-sponsor-div-box">
 
-.. image:: themes/scikit-learn/static/img/telecom.png
+.. image:: images/telecom.png
    :width: 50pt
    :align: center
    :target: https://www.telecom-paristech.fr/
@@ -423,7 +500,7 @@ funded a scikit-learn coding sprint in 2
    </div>
    <div class="sk-sponsor-div-box">
 
-.. image:: themes/scikit-learn/static/img/digicosme.png
+.. image:: images/digicosme.png
    :width: 100pt
    :align: center
    :target: https://digicosme.lri.fr
@@ -433,6 +510,31 @@ funded a scikit-learn coding sprint in 2
    </div>
    </div>
 
+.....................
+
+.. raw:: html
+
+   <div class="sk-sponsor-div">
+   <div class="sk-sponsor-div-box">
+
+`The Chan-Zuckerberg Initiative <https://chanzuckerberg.com/>`_ funded Nicolas
+Hug to work full-time on scikit-learn in 2020.
+
+.. raw:: html
+
+   </div>
+   <div class="sk-sponsor-div-box">
+
+.. image:: images/czi_logo.svg
+   :width: 100pt
+   :align: center
+   :target: https://chanzuckerberg.com
+
+.. raw:: html
+
+   </div>
+   </div>
+
 ......................
 
 The following students were sponsored by `Google
@@ -446,7 +548,7 @@ program.
 - 2013 - Kemal Eren, Nicolas Trésegnie
 - 2014 - Hamzeh Alsalhi, Issam Laradji, Maheshakya Wijewardena, Manoj Kumar.
 - 2015 - `Raghav RV <https://github.com/raghavrv>`_, Wei Xue
-- 2016 - `Nelson Liu <http://nelsonliu.me>`_, `YenChen Lin <https://yclin.me/>`_
+- 2016 - `Nelson Liu <http://nelsonliu.me>`_, `YenChen Lin <https://yenchenlin.me/>`_
 
 .. _Vlad Niculae: https://vene.ro/
 
@@ -502,21 +604,11 @@ budget of the project [#f1]_.
 
 .. raw :: html
 
-    </br></br>
-    <form action="https://www.paypal.com/cgi-bin/webscr" method="post"
-    target="_top">
-    <input type="hidden" name="cmd" value="_s-xclick">
-    <input type="hidden" name="hosted_button_id" value="74EYUMF3FTSW8">
-    <input type="image"
-    src="https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif"
-    border="0" name="submit" alt="PayPal - The safer, easier way to pay
-    online!" style="position: relative;
-    left: 40%;">
-    <img alt="" border="0"
-    src="https://www.paypalobjects.com/en_US/i/scr/pixel.gif" width="1"
-    height="1">
-    </form>
-    </br>
+   </br></br>
+   <div style="text-align: center;">
+   <a class="btn btn-warning btn-big sk-donate-btn mb-1" href="https://numfocus.org/donate-to-scikit-learn">Help us, <strong>donate!</strong></a>
+   </div>
+   </br>
 
 .. rubric:: Notes
 
@@ -527,13 +619,8 @@ budget of the project [#f1]_.
 Infrastructure support
 ----------------------
 
-- We would like to thank `Rackspace <https://www.rackspace.com>`_ for providing
-  us with a free `Rackspace Cloud <https://www.rackspace.com/cloud/>`_ account
-  to automatically build the documentation and the example gallery from for the
-  development version of scikit-learn using `this tool
-  <https://github.com/scikit-learn/sklearn-docbuilder>`_.
-
 - We would also like to thank `Microsoft Azure
   <https://azure.microsoft.com/en-us/>`_, `Travis Cl <https://travis-ci.org/>`_,
   `CircleCl <https://circleci.com/>`_ for free CPU time on their Continuous
-  Integration servers.
+  Integration servers, and `Anaconda Inc. <https://www.anaconda.com>`_ for the
+  storage they provide for our staging and nightly builds.
diff -pruN 0.23.2-5/doc/authors_emeritus.rst 1.1.1-1/doc/authors_emeritus.rst
--- 0.23.2-5/doc/authors_emeritus.rst	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/authors_emeritus.rst	2022-05-19 12:16:26.432781500 +0000
@@ -3,7 +3,6 @@
 - Lars Buitinck
 - David Cournapeau
 - Noel Dawe
-- Shiqiao Du
 - Vincent Dubourg
 - Edouard Duchesnay
 - Alexander Fabisch
@@ -28,6 +27,7 @@
 - Peter Prettenhofer
 - (Venkat) Raghav, Rajagopalan
 - Jacob Schreiber
+- Du Shiqiao
 - Jake Vanderplas
 - David Warde-Farley
 - Ron Weiss
\ No newline at end of file
diff -pruN 0.23.2-5/doc/authors.rst 1.1.1-1/doc/authors.rst
--- 0.23.2-5/doc/authors.rst	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/authors.rst	2022-05-19 12:16:26.432781500 +0000
@@ -6,83 +6,91 @@
       img.avatar {border-radius: 10px;}
     </style>
     <div>
-    <a href='https://github.com/jeremiedbb'><img src='https://avatars2.githubusercontent.com/u/34657725?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/jeremiedbb'><img src='https://avatars.githubusercontent.com/u/34657725?v=4' class='avatar' /></a> <br />
     <p>Jérémie du Boisberranger</p>
     </div>
     <div>
-    <a href='https://github.com/jorisvandenbossche'><img src='https://avatars2.githubusercontent.com/u/1020496?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/jorisvandenbossche'><img src='https://avatars.githubusercontent.com/u/1020496?v=4' class='avatar' /></a> <br />
     <p>Joris Van den Bossche</p>
     </div>
     <div>
-    <a href='https://github.com/lesteve'><img src='https://avatars1.githubusercontent.com/u/1680079?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/lesteve'><img src='https://avatars.githubusercontent.com/u/1680079?v=4' class='avatar' /></a> <br />
     <p>Loïc Estève</p>
     </div>
     <div>
-    <a href='https://github.com/thomasjpfan'><img src='https://avatars2.githubusercontent.com/u/5402633?v=4' class='avatar' /></a> <br />
-    <p>Thomas J Fan</p>
+    <a href='https://github.com/thomasjpfan'><img src='https://avatars.githubusercontent.com/u/5402633?v=4' class='avatar' /></a> <br />
+    <p>Thomas J. Fan</p>
     </div>
     <div>
-    <a href='https://github.com/agramfort'><img src='https://avatars2.githubusercontent.com/u/161052?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/agramfort'><img src='https://avatars.githubusercontent.com/u/161052?v=4' class='avatar' /></a> <br />
     <p>Alexandre Gramfort</p>
     </div>
     <div>
-    <a href='https://github.com/ogrisel'><img src='https://avatars0.githubusercontent.com/u/89061?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/ogrisel'><img src='https://avatars.githubusercontent.com/u/89061?v=4' class='avatar' /></a> <br />
     <p>Olivier Grisel</p>
     </div>
     <div>
-    <a href='https://github.com/yarikoptic'><img src='https://avatars3.githubusercontent.com/u/39889?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/yarikoptic'><img src='https://avatars.githubusercontent.com/u/39889?v=4' class='avatar' /></a> <br />
     <p>Yaroslav Halchenko</p>
     </div>
     <div>
-    <a href='https://github.com/NicolasHug'><img src='https://avatars2.githubusercontent.com/u/1190450?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/NicolasHug'><img src='https://avatars.githubusercontent.com/u/1190450?v=4' class='avatar' /></a> <br />
     <p>Nicolas Hug</p>
     </div>
     <div>
-    <a href='https://github.com/adrinjalali'><img src='https://avatars3.githubusercontent.com/u/1663864?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/adrinjalali'><img src='https://avatars.githubusercontent.com/u/1663864?v=4' class='avatar' /></a> <br />
     <p>Adrin Jalali</p>
     </div>
     <div>
-    <a href='https://github.com/glemaitre'><img src='https://avatars2.githubusercontent.com/u/7454015?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/jjerphan'><img src='https://avatars.githubusercontent.com/u/13029839?v=4' class='avatar' /></a> <br />
+    <p>Julien Jerphanion</p>
+    </div>
+    <div>
+    <a href='https://github.com/glemaitre'><img src='https://avatars.githubusercontent.com/u/7454015?v=4' class='avatar' /></a> <br />
     <p>Guillaume Lemaitre</p>
     </div>
     <div>
-    <a href='https://github.com/jmetzen'><img src='https://avatars1.githubusercontent.com/u/1116263?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/lorentzenchr'><img src='https://avatars.githubusercontent.com/u/15324633?v=4' class='avatar' /></a> <br />
+    <p>Christian Lorentzen</p>
+    </div>
+    <div>
+    <a href='https://github.com/jmetzen'><img src='https://avatars.githubusercontent.com/u/1116263?v=4' class='avatar' /></a> <br />
     <p>Jan Hendrik Metzen</p>
     </div>
     <div>
-    <a href='https://github.com/amueller'><img src='https://avatars3.githubusercontent.com/u/449558?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/amueller'><img src='https://avatars.githubusercontent.com/u/449558?v=4' class='avatar' /></a> <br />
     <p>Andreas Mueller</p>
     </div>
     <div>
-    <a href='https://github.com/vene'><img src='https://avatars0.githubusercontent.com/u/241745?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/vene'><img src='https://avatars.githubusercontent.com/u/241745?v=4' class='avatar' /></a> <br />
     <p>Vlad Niculae</p>
     </div>
     <div>
-    <a href='https://github.com/jnothman'><img src='https://avatars2.githubusercontent.com/u/78827?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/jnothman'><img src='https://avatars.githubusercontent.com/u/78827?v=4' class='avatar' /></a> <br />
     <p>Joel Nothman</p>
     </div>
     <div>
-    <a href='https://github.com/qinhanmin2014'><img src='https://avatars2.githubusercontent.com/u/12003569?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/qinhanmin2014'><img src='https://avatars.githubusercontent.com/u/12003569?v=4' class='avatar' /></a> <br />
     <p>Hanmin Qin</p>
     </div>
     <div>
-    <a href='https://github.com/bthirion'><img src='https://avatars1.githubusercontent.com/u/234454?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/bthirion'><img src='https://avatars.githubusercontent.com/u/234454?v=4' class='avatar' /></a> <br />
     <p>Bertrand Thirion</p>
     </div>
     <div>
-    <a href='https://github.com/TomDLT'><img src='https://avatars2.githubusercontent.com/u/11065596?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/TomDLT'><img src='https://avatars.githubusercontent.com/u/11065596?v=4' class='avatar' /></a> <br />
     <p>Tom Dupré la Tour</p>
     </div>
     <div>
-    <a href='https://github.com/GaelVaroquaux'><img src='https://avatars3.githubusercontent.com/u/208217?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/GaelVaroquaux'><img src='https://avatars.githubusercontent.com/u/208217?v=4' class='avatar' /></a> <br />
     <p>Gael Varoquaux</p>
     </div>
     <div>
-    <a href='https://github.com/NelleV'><img src='https://avatars0.githubusercontent.com/u/184798?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/NelleV'><img src='https://avatars.githubusercontent.com/u/184798?v=4' class='avatar' /></a> <br />
     <p>Nelle Varoquaux</p>
     </div>
     <div>
-    <a href='https://github.com/rth'><img src='https://avatars0.githubusercontent.com/u/630936?v=4' class='avatar' /></a> <br />
+    <a href='https://github.com/rth'><img src='https://avatars.githubusercontent.com/u/630936?v=4' class='avatar' /></a> <br />
     <p>Roman Yurchak</p>
     </div>
     </div>
\ No newline at end of file
diff -pruN 0.23.2-5/doc/binder/requirements.txt 1.1.1-1/doc/binder/requirements.txt
--- 0.23.2-5/doc/binder/requirements.txt	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/binder/requirements.txt	2022-05-19 12:16:26.432781500 +0000
@@ -1,5 +1,5 @@
-# A binder requirement file is required by sphinx-gallery. We don't really need
-# one since the binder requirement files live in the
-# scikit-learn/binder-examples repo and not in the scikit-learn.github.io repo
-# that comes from the scikit-learn doc build. This file can be removed if
-# 'dependencies' is made an optional key for binder in sphinx-gallery.
+# A binder requirement file is required by sphinx-gallery.
+# We don't really need one since our binder requirement file lives in the
+# .binder directory.
+# This file can be removed if 'dependencies' is made an optional key for
+# binder in sphinx-gallery.
diff -pruN 0.23.2-5/doc/common_pitfalls.rst 1.1.1-1/doc/common_pitfalls.rst
--- 0.23.2-5/doc/common_pitfalls.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/common_pitfalls.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,578 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. include:: includes/big_toc_css.rst
+
+.. _common_pitfalls:
+
+=========================================
+Common pitfalls and recommended practices
+=========================================
+
+The purpose of this chapter is to illustrate some common pitfalls and
+anti-patterns that occur when using scikit-learn. It provides
+examples of what **not** to do, along with a corresponding correct
+example.
+
+Inconsistent preprocessing
+==========================
+
+scikit-learn provides a library of :ref:`data-transforms`, which
+may clean (see :ref:`preprocessing`), reduce
+(see :ref:`data_reduction`), expand (see :ref:`kernel_approximation`)
+or generate (see :ref:`feature_extraction`) feature representations.
+If these data transforms are used when training a model, they also
+must be used on subsequent datasets, whether it's test data or
+data in a production system. Otherwise, the feature space will change,
+and the model will not be able to perform effectively.
+
+For the following example, let's create a synthetic dataset with a
+single feature::
+
+    >>> from sklearn.datasets import make_regression
+    >>> from sklearn.model_selection import train_test_split
+
+    >>> random_state = 42
+    >>> X, y = make_regression(random_state=random_state, n_features=1, noise=1)
+    >>> X_train, X_test, y_train, y_test = train_test_split(
+    ...     X, y, test_size=0.4, random_state=random_state)
+
+**Wrong**
+
+The train dataset is scaled, but not the test dataset, so model
+performance on the test dataset is worse than expected::
+
+    >>> from sklearn.metrics import mean_squared_error
+    >>> from sklearn.linear_model import LinearRegression
+    >>> from sklearn.preprocessing import StandardScaler
+
+    >>> scaler = StandardScaler()
+    >>> X_train_transformed = scaler.fit_transform(X_train)
+    >>> model = LinearRegression().fit(X_train_transformed, y_train)
+    >>> mean_squared_error(y_test, model.predict(X_test))
+    62.80...
+
+**Right**
+
+Instead of passing the non-transformed `X_test` to `predict`, we should
+transform the test data, the same way we transformed the training data::
+
+    >>> X_test_transformed = scaler.transform(X_test)
+    >>> mean_squared_error(y_test, model.predict(X_test_transformed))
+    0.90...
+
+Alternatively, we recommend using a :class:`Pipeline
+<sklearn.pipeline.Pipeline>`, which makes it easier to chain transformations
+with estimators, and reduces the possibility of forgetting a transformation::
+
+    >>> from sklearn.pipeline import make_pipeline
+
+    >>> model = make_pipeline(StandardScaler(), LinearRegression())
+    >>> model.fit(X_train, y_train)
+    Pipeline(steps=[('standardscaler', StandardScaler()),
+                    ('linearregression', LinearRegression())])
+    >>> mean_squared_error(y_test, model.predict(X_test))
+    0.90...
+
+Pipelines also help avoiding another common pitfall: leaking the test data
+into the training data.
+
+.. _data_leakage:
+
+Data leakage
+============
+
+Data leakage occurs when information that would not be available at prediction
+time is used when building the model. This results in overly optimistic
+performance estimates, for example from :ref:`cross-validation
+<cross_validation>`, and thus poorer performance when the model is used
+on actually novel data, for example during production.
+
+A common cause is not keeping the test and train data subsets separate.
+Test data should never be used to make choices about the model.
+**The general rule is to never call** `fit` **on the test data**. While this
+may sound obvious, this is easy to miss in some cases, for example when
+applying certain pre-processing steps.
+
+Although both train and test data subsets should receive the same
+preprocessing transformation (as described in the previous section), it is
+important that these transformations are only learnt from the training data.
+For example, if you have a
+normalization step where you divide by the average value, the average should
+be the average of the train subset, **not** the average of all the data. If the
+test subset is included in the average calculation, information from the test
+subset is influencing the model.
+
+An example of data leakage during preprocessing is detailed below.
+
+Data leakage during pre-processing
+----------------------------------
+
+.. note::
+    We here choose to illustrate data leakage with a feature selection step.
+    This risk of leakage is however relevant with almost all transformations
+    in scikit-learn, including (but not limited to)
+    :class:`~sklearn.preprocessing.StandardScaler`,
+    :class:`~sklearn.impute.SimpleImputer`, and
+    :class:`~sklearn.decomposition.PCA`.
+
+A number of :ref:`feature_selection` functions are available in scikit-learn.
+They can help remove irrelevant, redundant and noisy features as well as
+improve your model build time and performance. As with any other type of
+preprocessing, feature selection should **only** use the training data.
+Including the test data in feature selection will optimistically bias your
+model.
+
+To demonstrate we will create this binary classification problem with
+10,000 randomly generated features::
+
+    >>> import numpy as np
+    >>> n_samples, n_features, n_classes = 200, 10000, 2
+    >>> rng = np.random.RandomState(42)
+    >>> X = rng.standard_normal((n_samples, n_features))
+    >>> y = rng.choice(n_classes, n_samples)
+
+**Wrong**
+
+Using all the data to perform feature selection results in an accuracy score
+much higher than chance, even though our targets are completely random.
+This randomness means that our `X` and `y` are independent and we thus expect
+the accuracy to be around 0.5. However, since the feature selection step
+'sees' the test data, the model has an unfair advantage. In the incorrect
+example below we first use all the data for feature selection and then split
+the data into training and test subsets for model fitting. The result is a
+much higher than expected accuracy score::
+
+    >>> from sklearn.model_selection import train_test_split
+    >>> from sklearn.feature_selection import SelectKBest
+    >>> from sklearn.ensemble import GradientBoostingClassifier
+    >>> from sklearn.metrics import accuracy_score
+
+    >>> # Incorrect preprocessing: the entire data is transformed
+    >>> X_selected = SelectKBest(k=25).fit_transform(X, y)
+
+    >>> X_train, X_test, y_train, y_test = train_test_split(
+    ...     X_selected, y, random_state=42)
+    >>> gbc = GradientBoostingClassifier(random_state=1)
+    >>> gbc.fit(X_train, y_train)
+    GradientBoostingClassifier(random_state=1)
+
+    >>> y_pred = gbc.predict(X_test)
+    >>> accuracy_score(y_test, y_pred)
+    0.76
+
+**Right**
+
+To prevent data leakage, it is good practice to split your data into train
+and test subsets **first**. Feature selection can then be formed using just
+the train dataset. Notice that whenever we use `fit` or `fit_transform`, we
+only use the train dataset. The score is now what we would expect for the
+data, close to chance::
+
+    >>> X_train, X_test, y_train, y_test = train_test_split(
+    ...     X, y, random_state=42)
+    >>> select = SelectKBest(k=25)
+    >>> X_train_selected = select.fit_transform(X_train, y_train)
+
+    >>> gbc = GradientBoostingClassifier(random_state=1)
+    >>> gbc.fit(X_train_selected, y_train)
+    GradientBoostingClassifier(random_state=1)
+
+    >>> X_test_selected = select.transform(X_test)
+    >>> y_pred = gbc.predict(X_test_selected)
+    >>> accuracy_score(y_test, y_pred)
+    0.46
+
+Here again, we recommend using a :class:`~sklearn.pipeline.Pipeline` to chain
+together the feature selection and model estimators. The pipeline ensures
+that only the training data is used when performing `fit` and the test data
+is used only for calculating the accuracy score::
+
+    >>> from sklearn.pipeline import make_pipeline
+    >>> X_train, X_test, y_train, y_test = train_test_split(
+    ...     X, y, random_state=42)
+    >>> pipeline = make_pipeline(SelectKBest(k=25),
+    ...                          GradientBoostingClassifier(random_state=1))
+    >>> pipeline.fit(X_train, y_train)
+    Pipeline(steps=[('selectkbest', SelectKBest(k=25)),
+                    ('gradientboostingclassifier',
+                    GradientBoostingClassifier(random_state=1))])
+
+    >>> y_pred = pipeline.predict(X_test)
+    >>> accuracy_score(y_test, y_pred)
+    0.46
+
+The pipeline can also be fed into a cross-validation
+function such as :func:`~sklearn.model_selection.cross_val_score`.
+Again, the pipeline ensures that the correct data subset and estimator
+method is used during fitting and predicting::
+
+    >>> from sklearn.model_selection import cross_val_score
+    >>> scores = cross_val_score(pipeline, X, y)
+    >>> print(f"Mean accuracy: {scores.mean():.2f}+/-{scores.std():.2f}")
+    Mean accuracy: 0.45+/-0.07
+
+How to avoid data leakage
+-------------------------
+
+Below are some tips on avoiding data leakage:
+
+* Always split the data into train and test subsets first, particularly
+  before any preprocessing steps.
+* Never include test data when using the `fit` and `fit_transform`
+  methods. Using all the data, e.g., `fit(X)`, can result in overly optimistic
+  scores.
+
+  Conversely, the `transform` method should be used on both train and test
+  subsets as the same preprocessing should be applied to all the data.
+  This can be achieved by using `fit_transform` on the train subset and
+  `transform` on the test subset.
+* The scikit-learn :ref:`pipeline <pipeline>` is a great way to prevent data
+  leakage as it ensures that the appropriate method is performed on the
+  correct data subset. The pipeline is ideal for use in cross-validation
+  and hyper-parameter tuning functions.
+
+.. _randomness:
+
+Controlling randomness
+======================
+
+Some scikit-learn objects are inherently random. These are usually estimators
+(e.g. :class:`~sklearn.ensemble.RandomForestClassifier`) and cross-validation
+splitters (e.g. :class:`~sklearn.model_selection.KFold`). The randomness of
+these objects is controlled via their `random_state` parameter, as described
+in the :term:`Glossary <random_state>`. This section expands on the glossary
+entry, and describes good practices and common pitfalls w.r.t. to this
+subtle parameter.
+
+.. note:: Recommendation summary
+
+    For an optimal robustness of cross-validation (CV) results, pass
+    `RandomState` instances when creating estimators, or leave `random_state`
+    to `None`. Passing integers to CV splitters is usually the safest option
+    and is preferable; passing `RandomState` instances to splitters may
+    sometimes be useful to achieve very specific use-cases.
+    For both estimators and splitters, passing an integer vs passing an
+    instance (or `None`) leads to subtle but significant differences,
+    especially for CV procedures. These differences are important to
+    understand when reporting results.
+
+    For reproducible results across executions, remove any use of
+    `random_state=None`.
+
+Using `None` or `RandomState` instances, and repeated calls to `fit` and `split`
+--------------------------------------------------------------------------------
+
+The `random_state` parameter determines whether multiple calls to :term:`fit`
+(for estimators) or to :term:`split` (for CV splitters) will produce the same
+results, according to these rules:
+
+- If an integer is passed, calling `fit` or `split` multiple times always
+  yields the same results.
+- If `None` or a `RandomState` instance is passed: `fit` and `split` will
+  yield different results each time they are called, and the succession of
+  calls explores all sources of entropy. `None` is the default value for all
+  `random_state` parameters.
+
+We here illustrate these rules for both estimators and CV splitters.
+
+.. note::
+    Since passing `random_state=None` is equivalent to passing the global
+    `RandomState` instance from `numpy`
+    (`random_state=np.random.mtrand._rand`), we will not explicitly mention
+    `None` here. Everything that applies to instances also applies to using
+    `None`.
+
+Estimators
+..........
+
+Passing instances means that calling `fit` multiple times will not yield the
+same results, even if the estimator is fitted on the same data and with the
+same hyper-parameters::
+
+    >>> from sklearn.linear_model import SGDClassifier
+    >>> from sklearn.datasets import make_classification
+    >>> import numpy as np
+
+    >>> rng = np.random.RandomState(0)
+    >>> X, y = make_classification(n_features=5, random_state=rng)
+    >>> sgd = SGDClassifier(random_state=rng)
+
+    >>> sgd.fit(X, y).coef_
+    array([[ 8.85418642,  4.79084103, -3.13077794,  8.11915045, -0.56479934]])
+
+    >>> sgd.fit(X, y).coef_
+    array([[ 6.70814003,  5.25291366, -7.55212743,  5.18197458,  1.37845099]])
+
+We can see from the snippet above that repeatedly calling `sgd.fit` has
+produced different models, even if the data was the same. This is because the
+Random Number Generator (RNG) of the estimator is consumed (i.e. mutated)
+when `fit` is called, and this mutated RNG will be used in the subsequent
+calls to `fit`. In addition, the `rng` object is shared across all objects
+that use it, and as a consequence, these objects become somewhat
+inter-dependent. For example, two estimators that share the same
+`RandomState` instance will influence each other, as we will see later when
+we discuss cloning. This point is important to keep in mind when debugging.
+
+If we had passed an integer to the `random_state` parameter of the
+:class:`~sklearn.ensemble.RandomForestClassifier`, we would have obtained the
+same models, and thus the same scores each time. When we pass an integer, the
+same RNG is used across all calls to `fit`. What internally happens is that
+even though the RNG is consumed when `fit` is called, it is always reset to
+its original state at the beginning of `fit`.
+
+CV splitters
+............
+
+Randomized CV splitters have a similar behavior when a `RandomState`
+instance is passed; calling `split` multiple times yields different data
+splits::
+
+    >>> from sklearn.model_selection import KFold
+    >>> import numpy as np
+
+    >>> X = y = np.arange(10)
+    >>> rng = np.random.RandomState(0)
+    >>> cv = KFold(n_splits=2, shuffle=True, random_state=rng)
+
+    >>> for train, test in cv.split(X, y):
+    ...     print(train, test)
+    [0 3 5 6 7] [1 2 4 8 9]
+    [1 2 4 8 9] [0 3 5 6 7]
+
+    >>> for train, test in cv.split(X, y):
+    ...     print(train, test)
+    [0 4 6 7 8] [1 2 3 5 9]
+    [1 2 3 5 9] [0 4 6 7 8]
+
+We can see that the splits are different from the second time `split` is
+called. This may lead to unexpected results if you compare the performance of
+multiple estimators by calling `split` many times, as we will see in the next
+section.
+
+Common pitfalls and subtleties
+------------------------------
+
+While the rules that govern the `random_state` parameter are seemingly simple,
+they do however have some subtle implications. In some cases, this can even
+lead to wrong conclusions.
+
+Estimators
+..........
+
+**Different `random_state` types lead to different cross-validation
+procedures**
+
+Depending on the type of the `random_state` parameter, estimators will behave
+differently, especially in cross-validation procedures. Consider the
+following snippet::
+
+    >>> from sklearn.ensemble import RandomForestClassifier
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.model_selection import cross_val_score
+    >>> import numpy as np
+
+    >>> X, y = make_classification(random_state=0)
+
+    >>> rf_123 = RandomForestClassifier(random_state=123)
+    >>> cross_val_score(rf_123, X, y)
+    array([0.85, 0.95, 0.95, 0.9 , 0.9 ])
+
+    >>> rf_inst = RandomForestClassifier(random_state=np.random.RandomState(0))
+    >>> cross_val_score(rf_inst, X, y)
+    array([0.9 , 0.95, 0.95, 0.9 , 0.9 ])
+
+We see that the cross-validated scores of `rf_123` and `rf_inst` are
+different, as should be expected since we didn't pass the same `random_state`
+parameter. However, the difference between these scores is more subtle than
+it looks, and **the cross-validation procedures that were performed by**
+:func:`~sklearn.model_selection.cross_val_score` **significantly differ in
+each case**:
+
+- Since `rf_123` was passed an integer, every call to `fit` uses the same RNG:
+  this means that all random characteristics of the random forest estimator
+  will be the same for each of the 5 folds of the CV procedure. In
+  particular, the (randomly chosen) subset of features of the estimator will
+  be the same across all folds.
+- Since `rf_inst` was passed a `RandomState` instance, each call to `fit`
+  starts from a different RNG. As a result, the random subset of features
+  will be different for each folds.
+
+While having a constant estimator RNG across folds isn't inherently wrong, we
+usually want CV results that are robust w.r.t. the estimator's randomness. As
+a result, passing an instance instead of an integer may be preferable, since
+it will allow the estimator RNG to vary for each fold.
+
+.. note::
+    Here, :func:`~sklearn.model_selection.cross_val_score` will use a
+    non-randomized CV splitter (as is the default), so both estimators will
+    be evaluated on the same splits. This section is not about variability in
+    the splits. Also, whether we pass an integer or an instance to
+    :func:`~sklearn.datasets.make_classification` isn't relevant for our
+    illustration purpose: what matters is what we pass to the
+    :class:`~sklearn.ensemble.RandomForestClassifier` estimator.
+
+**Cloning**
+
+Another subtle side effect of passing `RandomState` instances is how
+:func:`~sklearn.clone` will work::
+
+    >>> from sklearn import clone
+    >>> from sklearn.ensemble import RandomForestClassifier
+    >>> import numpy as np
+
+    >>> rng = np.random.RandomState(0)
+    >>> a = RandomForestClassifier(random_state=rng)
+    >>> b = clone(a)
+
+Since a `RandomState` instance was passed to `a`, `a` and `b` are not clones
+in the strict sense, but rather clones in the statistical sense: `a` and `b`
+will still be different models, even when calling `fit(X, y)` on the same
+data. Moreover, `a` and `b` will influence each-other since they share the
+same internal RNG: calling `a.fit` will consume `b`'s RNG, and calling
+`b.fit` will consume `a`'s RNG, since they are the same. This bit is true for
+any estimators that share a `random_state` parameter; it is not specific to
+clones.
+
+If an integer were passed, `a` and `b` would be exact clones and they would not
+influence each other.
+
+.. warning::
+    Even though :func:`~sklearn.clone` is rarely used in user code, it is
+    called pervasively throughout scikit-learn codebase: in particular, most
+    meta-estimators that accept non-fitted estimators call
+    :func:`~sklearn.clone` internally
+    (:class:`~sklearn.model_selection.GridSearchCV`,
+    :class:`~sklearn.ensemble.StackingClassifier`,
+    :class:`~sklearn.calibration.CalibratedClassifierCV`, etc.).
+
+CV splitters
+............
+
+When passed a `RandomState` instance, CV splitters yield different splits
+each time `split` is called. When comparing different estimators, this can
+lead to overestimating the variance of the difference in performance between
+the estimators::
+
+    >>> from sklearn.naive_bayes import GaussianNB
+    >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.model_selection import KFold
+    >>> from sklearn.model_selection import cross_val_score
+    >>> import numpy as np
+
+    >>> rng = np.random.RandomState(0)
+    >>> X, y = make_classification(random_state=rng)
+    >>> cv = KFold(shuffle=True, random_state=rng)
+    >>> lda = LinearDiscriminantAnalysis()
+    >>> nb = GaussianNB()
+
+    >>> for est in (lda, nb):
+    ...     print(cross_val_score(est, X, y, cv=cv))
+    [0.8  0.75 0.75 0.7  0.85]
+    [0.85 0.95 0.95 0.85 0.95]
+
+
+Directly comparing the performance of the
+:class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis` estimator
+vs the :class:`~sklearn.naive_bayes.GaussianNB` estimator **on each fold** would
+be a mistake: **the splits on which the estimators are evaluated are
+different**. Indeed, :func:`~sklearn.model_selection.cross_val_score` will
+internally call `cv.split` on the same
+:class:`~sklearn.model_selection.KFold` instance, but the splits will be
+different each time. This is also true for any tool that performs model
+selection via cross-validation, e.g.
+:class:`~sklearn.model_selection.GridSearchCV` and
+:class:`~sklearn.model_selection.RandomizedSearchCV`: scores are not
+comparable fold-to-fold across different calls to `search.fit`, since
+`cv.split` would have been called multiple times. Within a single call to
+`search.fit`, however, fold-to-fold comparison is possible since the search
+estimator only calls `cv.split` once.
+
+For comparable fold-to-fold results in all scenarios, one should pass an
+integer to the CV splitter: `cv = KFold(shuffle=True, random_state=0)`.
+
+.. note::
+    While fold-to-fold comparison is not advisable with `RandomState`
+    instances, one can however expect that average scores allow to conclude
+    whether one estimator is better than another, as long as enough folds and
+    data are used.
+
+.. note::
+    What matters in this example is what was passed to
+    :class:`~sklearn.model_selection.KFold`. Whether we pass a `RandomState`
+    instance or an integer to :func:`~sklearn.datasets.make_classification`
+    is not relevant for our illustration purpose. Also, neither
+    :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis` nor
+    :class:`~sklearn.naive_bayes.GaussianNB` are randomized estimators.
+
+General recommendations
+-----------------------
+
+Getting reproducible results across multiple executions
+.......................................................
+
+In order to obtain reproducible (i.e. constant) results across multiple
+*program executions*, we need to remove all uses of `random_state=None`, which
+is the default. The recommended way is to declare a `rng` variable at the top
+of the program, and pass it down to any object that accepts a `random_state`
+parameter::
+
+    >>> from sklearn.ensemble import RandomForestClassifier
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.model_selection import train_test_split
+    >>> import numpy as np
+
+    >>> rng = np.random.RandomState(0)
+    >>> X, y = make_classification(random_state=rng)
+    >>> rf = RandomForestClassifier(random_state=rng)
+    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
+    ...                                                     random_state=rng)
+    >>> rf.fit(X_train, y_train).score(X_test, y_test)
+    0.84
+
+We are now guaranteed that the result of this script will always be 0.84, no
+matter how many times we run it. Changing the global `rng` variable to a
+different value should affect the results, as expected.
+
+It is also possible to declare the `rng` variable as an integer. This may
+however lead to less robust cross-validation results, as we will see in the
+next section.
+
+.. note::
+    We do not recommend setting the global `numpy` seed by calling
+    `np.random.seed(0)`. See `here
+    <https://stackoverflow.com/questions/5836335/consistently-create-same-random-numpy-array/5837352#comment6712034_5837352>`_
+    for a discussion.
+
+Robustness of cross-validation results
+......................................
+
+When we evaluate a randomized estimator performance by cross-validation, we
+want to make sure that the estimator can yield accurate predictions for new
+data, but we also want to make sure that the estimator is robust w.r.t. its
+random initialization. For example, we would like the random weights
+initialization of a :class:`~sklearn.linear_model.SGDCLassifier` to be
+consistently good across all folds: otherwise, when we train that estimator
+on new data, we might get unlucky and the random initialization may lead to
+bad performance. Similarly, we want a random forest to be robust w.r.t the
+set of randomly selected features that each tree will be using.
+
+For these reasons, it is preferable to evaluate the cross-validation
+performance by letting the estimator use a different RNG on each fold. This
+is done by passing a `RandomState` instance (or `None`) to the estimator
+initialization.
+
+When we pass an integer, the estimator will use the same RNG on each fold:
+if the estimator performs well (or bad), as evaluated by CV, it might just be
+because we got lucky (or unlucky) with that specific seed. Passing instances
+leads to more robust CV results, and makes the comparison between various
+algorithms fairer. It also helps limiting the temptation to treat the
+estimator's RNG as a hyper-parameter that can be tuned.
+
+Whether we pass `RandomState` instances or integers to CV splitters has no
+impact on robustness, as long as `split` is only called once. When `split`
+is called multiple times, fold-to-fold comparison isn't possible anymore. As
+a result, passing integer to CV splitters is usually safer and covers most
+use-cases.
diff -pruN 0.23.2-5/doc/communication_team_emeritus.rst 1.1.1-1/doc/communication_team_emeritus.rst
--- 0.23.2-5/doc/communication_team_emeritus.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/communication_team_emeritus.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1 @@
+- Reshama Shaikh
\ No newline at end of file
diff -pruN 0.23.2-5/doc/communication_team.rst 1.1.1-1/doc/communication_team.rst
--- 0.23.2-5/doc/communication_team.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/communication_team.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,16 @@
+.. raw :: html
+
+    <!-- Generated by generate_authors_table.py -->
+    <div class="sk-authors-container">
+    <style>
+      img.avatar {border-radius: 10px;}
+    </style>
+    <div>
+    <a href='https://github.com/laurburke'><img src='https://avatars.githubusercontent.com/u/35973528?v=4' class='avatar' /></a> <br />
+    <p>Lauren Burke</p>
+    </div>
+    <div>
+    <a href='https://github.com/francoisgoupil'><img src='https://avatars.githubusercontent.com/u/98105626?v=4' class='avatar' /></a> <br />
+    <p>francoisgoupil</p>
+    </div>
+    </div>
\ No newline at end of file
diff -pruN 0.23.2-5/doc/computing/computational_performance.rst 1.1.1-1/doc/computing/computational_performance.rst
--- 0.23.2-5/doc/computing/computational_performance.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/computing/computational_performance.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,370 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. _computational_performance:
+
+.. currentmodule:: sklearn
+
+Computational Performance
+=========================
+
+For some applications the performance (mainly latency and throughput at
+prediction time) of estimators is crucial. It may also be of interest to
+consider the training throughput but this is often less important in a
+production setup (where it often takes place offline).
+
+We will review here the orders of magnitude you can expect from a number of
+scikit-learn estimators in different contexts and provide some tips and
+tricks for overcoming performance bottlenecks.
+
+Prediction latency is measured as the elapsed time necessary to make a
+prediction (e.g. in micro-seconds). Latency is often viewed as a distribution
+and operations engineers often focus on the latency at a given percentile of
+this distribution (e.g. the 90 percentile).
+
+Prediction throughput is defined as the number of predictions the software can
+deliver in a given amount of time (e.g. in predictions per second).
+
+An important aspect of performance optimization is also that it can hurt
+prediction accuracy. Indeed, simpler models (e.g. linear instead of
+non-linear, or with fewer parameters) often run faster but are not always able
+to take into account the same exact properties of the data as more complex ones.
+
+Prediction Latency
+------------------
+
+One of the most straight-forward concerns one may have when using/choosing a
+machine learning toolkit is the latency at which predictions can be made in a
+production environment.
+
+The main factors that influence the prediction latency are
+  1. Number of features
+  2. Input data representation and sparsity
+  3. Model complexity
+  4. Feature extraction
+
+A last major parameter is also the possibility to do predictions in bulk or
+one-at-a-time mode.
+
+Bulk versus Atomic mode
+........................
+
+In general doing predictions in bulk (many instances at the same time) is
+more efficient for a number of reasons (branching predictability, CPU cache,
+linear algebra libraries optimizations etc.). Here we see on a setting
+with few features that independently of estimator choice the bulk mode is
+always faster, and for some of them by 1 to 2 orders of magnitude:
+
+.. |atomic_prediction_latency| image::  ../auto_examples/applications/images/sphx_glr_plot_prediction_latency_001.png
+    :target: ../auto_examples/applications/plot_prediction_latency.html
+    :scale: 80
+
+.. centered:: |atomic_prediction_latency|
+
+.. |bulk_prediction_latency| image::  ../auto_examples/applications/images/sphx_glr_plot_prediction_latency_002.png
+    :target: ../auto_examples/applications/plot_prediction_latency.html
+    :scale: 80
+
+.. centered:: |bulk_prediction_latency|
+
+To benchmark different estimators for your case you can simply change the
+``n_features`` parameter in this example:
+:ref:`sphx_glr_auto_examples_applications_plot_prediction_latency.py`. This should give
+you an estimate of the order of magnitude of the prediction latency.
+
+Configuring Scikit-learn for reduced validation overhead
+.........................................................
+
+Scikit-learn does some validation on data that increases the overhead per
+call to ``predict`` and similar functions. In particular, checking that
+features are finite (not NaN or infinite) involves a full pass over the
+data. If you ensure that your data is acceptable, you may suppress
+checking for finiteness by setting the environment variable
+``SKLEARN_ASSUME_FINITE`` to a non-empty string before importing
+scikit-learn, or configure it in Python with :func:`set_config`.
+For more control than these global settings, a :func:`config_context`
+allows you to set this configuration within a specified context::
+
+  >>> import sklearn
+  >>> with sklearn.config_context(assume_finite=True):
+  ...     pass  # do learning/prediction here with reduced validation
+
+Note that this will affect all uses of
+:func:`~utils.assert_all_finite` within the context.
+
+Influence of the Number of Features
+....................................
+
+Obviously when the number of features increases so does the memory
+consumption of each example. Indeed, for a matrix of :math:`M` instances
+with :math:`N` features, the space complexity is in :math:`O(NM)`.
+From a computing perspective it also means that the number of basic operations
+(e.g., multiplications for vector-matrix products in linear models) increases
+too. Here is a graph of the evolution of the prediction latency with the
+number of features:
+
+.. |influence_of_n_features_on_latency| image::  ../auto_examples/applications/images/sphx_glr_plot_prediction_latency_003.png
+    :target: ../auto_examples/applications/plot_prediction_latency.html
+    :scale: 80
+
+.. centered:: |influence_of_n_features_on_latency|
+
+Overall you can expect the prediction time to increase at least linearly with
+the number of features (non-linear cases can happen depending on the global
+memory footprint and estimator).
+
+Influence of the Input Data Representation
+...........................................
+
+Scipy provides sparse matrix data structures which are optimized for storing
+sparse data. The main feature of sparse formats is that you don't store zeros
+so if your data is sparse then you use much less memory. A non-zero value in
+a sparse (`CSR or CSC <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_)
+representation will only take on average one 32bit integer position + the 64
+bit floating point value + an additional 32bit per row or column in the matrix.
+Using sparse input on a dense (or sparse) linear model can speedup prediction
+by quite a bit as only the non zero valued features impact the dot product
+and thus the model predictions. Hence if you have 100 non zeros in 1e6
+dimensional space, you only need 100 multiply and add operation instead of 1e6.
+
+Calculation over a dense representation, however, may leverage highly optimized
+vector operations and multithreading in BLAS, and tends to result in fewer CPU
+cache misses. So the sparsity should typically be quite high (10% non-zeros
+max, to be checked depending on the hardware) for the sparse input
+representation to be faster than the dense input representation on a machine
+with many CPUs and an optimized BLAS implementation.
+
+Here is sample code to test the sparsity of your input::
+
+    def sparsity_ratio(X):
+        return 1.0 - np.count_nonzero(X) / float(X.shape[0] * X.shape[1])
+    print("input sparsity ratio:", sparsity_ratio(X))
+
+As a rule of thumb you can consider that if the sparsity ratio is greater
+than 90% you can probably benefit from sparse formats. Check Scipy's sparse
+matrix formats `documentation <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_
+for more information on how to build (or convert your data to) sparse matrix
+formats. Most of the time the ``CSR`` and ``CSC`` formats work best.
+
+Influence of the Model Complexity
+..................................
+
+Generally speaking, when model complexity increases, predictive power and
+latency are supposed to increase. Increasing predictive power is usually
+interesting, but for many applications we would better not increase
+prediction latency too much. We will now review this idea for different
+families of supervised models.
+
+For :mod:`sklearn.linear_model` (e.g. Lasso, ElasticNet,
+SGDClassifier/Regressor, Ridge & RidgeClassifier,
+PassiveAggressiveClassifier/Regressor, LinearSVC, LogisticRegression...) the
+decision function that is applied at prediction time is the same (a dot product)
+, so latency should be equivalent.
+
+Here is an example using
+:class:`~linear_model.SGDClassifier` with the
+``elasticnet`` penalty. The regularization strength is globally controlled by
+the ``alpha`` parameter. With a sufficiently high ``alpha``,
+one can then increase the ``l1_ratio`` parameter of ``elasticnet`` to
+enforce various levels of sparsity in the model coefficients. Higher sparsity
+here is interpreted as less model complexity as we need fewer coefficients to
+describe it fully. Of course sparsity influences in turn the prediction time
+as the sparse dot-product takes time roughly proportional to the number of
+non-zero coefficients.
+
+.. |en_model_complexity| image::  ../auto_examples/applications/images/sphx_glr_plot_model_complexity_influence_001.png
+    :target: ../auto_examples/applications/plot_model_complexity_influence.html
+    :scale: 80
+
+.. centered:: |en_model_complexity|
+
+For the :mod:`sklearn.svm` family of algorithms with a non-linear kernel,
+the latency is tied to the number of support vectors (the fewer the faster).
+Latency and throughput should (asymptotically) grow linearly with the number
+of support vectors in a SVC or SVR model. The kernel will also influence the
+latency as it is used to compute the projection of the input vector once per
+support vector. In the following graph the ``nu`` parameter of
+:class:`~svm.NuSVR` was used to influence the number of
+support vectors.
+
+.. |nusvr_model_complexity| image::  ../auto_examples/applications/images/sphx_glr_plot_model_complexity_influence_002.png
+    :target: ../auto_examples/applications/plot_model_complexity_influence.html
+    :scale: 80
+
+.. centered:: |nusvr_model_complexity|
+
+For :mod:`sklearn.ensemble` of trees (e.g. RandomForest, GBT,
+ExtraTrees etc) the number of trees and their depth play the most
+important role. Latency and throughput should scale linearly with the number
+of trees. In this case we used directly the ``n_estimators`` parameter of
+:class:`~ensemble.GradientBoostingRegressor`.
+
+.. |gbt_model_complexity| image::  ../auto_examples/applications/images/sphx_glr_plot_model_complexity_influence_003.png
+    :target: ../auto_examples/applications/plot_model_complexity_influence.html
+    :scale: 80
+
+.. centered:: |gbt_model_complexity|
+
+In any case be warned that decreasing model complexity can hurt accuracy as
+mentioned above. For instance a non-linearly separable problem can be handled
+with a speedy linear model but prediction power will very likely suffer in
+the process.
+
+Feature Extraction Latency
+..........................
+
+Most scikit-learn models are usually pretty fast as they are implemented
+either with compiled Cython extensions or optimized computing libraries.
+On the other hand, in many real world applications the feature extraction
+process (i.e. turning raw data like database rows or network packets into
+numpy arrays) governs the overall prediction time. For example on the Reuters
+text classification task the whole preparation (reading and parsing SGML
+files, tokenizing the text and hashing it into a common vector space) is
+taking 100 to 500 times more time than the actual prediction code, depending on
+the chosen model.
+
+ .. |prediction_time| image::  ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_004.png
+    :target: ../auto_examples/applications/plot_out_of_core_classification.html
+    :scale: 80
+
+.. centered:: |prediction_time|
+
+In many cases it is thus recommended to carefully time and profile your
+feature extraction code as it may be a good place to start optimizing when
+your overall latency is too slow for your application.
+
+Prediction Throughput
+----------------------
+
+Another important metric to care about when sizing production systems is the
+throughput i.e. the number of predictions you can make in a given amount of
+time. Here is a benchmark from the
+:ref:`sphx_glr_auto_examples_applications_plot_prediction_latency.py` example that measures
+this quantity for a number of estimators on synthetic data:
+
+.. |throughput_benchmark| image::  ../auto_examples/applications/images/sphx_glr_plot_prediction_latency_004.png
+    :target: ../auto_examples/applications/plot_prediction_latency.html
+    :scale: 80
+
+.. centered:: |throughput_benchmark|
+
+These throughputs are achieved on a single process. An obvious way to
+increase the throughput of your application is to spawn additional instances
+(usually processes in Python because of the
+`GIL <https://wiki.python.org/moin/GlobalInterpreterLock>`_) that share the
+same model. One might also add machines to spread the load. A detailed
+explanation on how to achieve this is beyond the scope of this documentation
+though.
+
+Tips and Tricks
+----------------
+
+Linear algebra libraries
+.........................
+
+As scikit-learn relies heavily on Numpy/Scipy and linear algebra in general it
+makes sense to take explicit care of the versions of these libraries.
+Basically, you ought to make sure that Numpy is built using an optimized `BLAS
+<https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms>`_ /
+`LAPACK <https://en.wikipedia.org/wiki/LAPACK>`_ library.
+
+Not all models benefit from optimized BLAS and Lapack implementations. For
+instance models based on (randomized) decision trees typically do not rely on
+BLAS calls in their inner loops, nor do kernel SVMs (``SVC``, ``SVR``,
+``NuSVC``, ``NuSVR``).  On the other hand a linear model implemented with a
+BLAS DGEMM call (via ``numpy.dot``) will typically benefit hugely from a tuned
+BLAS implementation and lead to orders of magnitude speedup over a
+non-optimized BLAS.
+
+You can display the BLAS / LAPACK implementation used by your NumPy / SciPy /
+scikit-learn install with the following commands::
+
+    from numpy.distutils.system_info import get_info
+    print(get_info('blas_opt'))
+    print(get_info('lapack_opt'))
+
+Optimized BLAS / LAPACK implementations include:
+ - Atlas (need hardware specific tuning by rebuilding on the target machine)
+ - OpenBLAS
+ - MKL
+ - Apple Accelerate and vecLib frameworks (OSX only)
+
+More information can be found on the `Scipy install page <https://docs.scipy.org/doc/numpy/user/install.html>`_
+and in this
+`blog post <http://danielnouri.org/notes/2012/12/19/libblas-and-liblapack-issues-and-speed,-with-scipy-and-ubuntu/>`_
+from Daniel Nouri which has some nice step by step install instructions for
+Debian / Ubuntu.
+
+.. _working_memory:
+
+Limiting Working Memory
+........................
+
+Some calculations when implemented using standard numpy vectorized operations
+involve using a large amount of temporary memory.  This may potentially exhaust
+system memory.  Where computations can be performed in fixed-memory chunks, we
+attempt to do so, and allow the user to hint at the maximum size of this
+working memory (defaulting to 1GB) using :func:`set_config` or
+:func:`config_context`.  The following suggests to limit temporary working
+memory to 128 MiB::
+
+  >>> import sklearn
+  >>> with sklearn.config_context(working_memory=128):
+  ...     pass  # do chunked work here
+
+An example of a chunked operation adhering to this setting is
+:func:`~metrics.pairwise_distances_chunked`, which facilitates computing
+row-wise reductions of a pairwise distance matrix.
+
+Model Compression
+..................
+
+Model compression in scikit-learn only concerns linear models for the moment.
+In this context it means that we want to control the model sparsity (i.e. the
+number of non-zero coordinates in the model vectors). It is generally a good
+idea to combine model sparsity with sparse input data representation.
+
+Here is sample code that illustrates the use of the ``sparsify()`` method::
+
+    clf = SGDRegressor(penalty='elasticnet', l1_ratio=0.25)
+    clf.fit(X_train, y_train).sparsify()
+    clf.predict(X_test)
+
+In this example we prefer the ``elasticnet`` penalty as it is often a good
+compromise between model compactness and prediction power. One can also
+further tune the ``l1_ratio`` parameter (in combination with the
+regularization strength ``alpha``) to control this tradeoff.
+
+A typical `benchmark <https://github.com/scikit-learn/scikit-learn/blob/main/benchmarks/bench_sparsify.py>`_
+on synthetic data yields a >30% decrease in latency when both the model and
+input are sparse (with 0.000024 and 0.027400 non-zero coefficients ratio
+respectively). Your mileage may vary depending on the sparsity and size of
+your data and model.
+Furthermore, sparsifying can be very useful to reduce the memory usage of
+predictive models deployed on production servers.
+
+Model Reshaping
+................
+
+Model reshaping consists in selecting only a portion of the available features
+to fit a model. In other words, if a model discards features during the
+learning phase we can then strip those from the input. This has several
+benefits. Firstly it reduces memory (and therefore time) overhead of the
+model itself. It also allows to discard explicit
+feature selection components in a pipeline once we know which features to
+keep from a previous run. Finally, it can help reduce processing time and I/O
+usage upstream in the data access and feature extraction layers by not
+collecting and building features that are discarded by the model. For instance
+if the raw data come from a database, it can make it possible to write simpler
+and faster queries or reduce I/O usage by making the queries return lighter
+records.
+At the moment, reshaping needs to be performed manually in scikit-learn.
+In the case of sparse input (particularly in ``CSR`` format), it is generally
+sufficient to not generate the relevant features, leaving their columns empty.
+
+Links
+......
+
+  - :ref:`scikit-learn developer performance documentation <performance-howto>`
+  - `Scipy sparse matrix formats documentation <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_
diff -pruN 0.23.2-5/doc/computing/parallelism.rst 1.1.1-1/doc/computing/parallelism.rst
--- 0.23.2-5/doc/computing/parallelism.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/computing/parallelism.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,279 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+Parallelism, resource management, and configuration
+===================================================
+
+.. _parallelism:
+
+Parallelism
+-----------
+
+Some scikit-learn estimators and utilities can parallelize costly operations
+using multiple CPU cores, thanks to the following components:
+
+- via the `joblib <https://joblib.readthedocs.io/en/latest/>`_ library. In
+  this case the number of threads or processes can be controlled with the
+  ``n_jobs`` parameter.
+- via OpenMP, used in C or Cython code.
+
+In addition, some of the numpy routines that are used internally by
+scikit-learn may also be parallelized if numpy is installed with specific
+numerical libraries such as MKL, OpenBLAS, or BLIS.
+
+We describe these 3 scenarios in the following subsections.
+
+Joblib-based parallelism
+........................
+
+When the underlying implementation uses joblib, the number of workers
+(threads or processes) that are spawned in parallel can be controlled via the
+``n_jobs`` parameter.
+
+.. note::
+
+    Where (and how) parallelization happens in the estimators is currently
+    poorly documented. Please help us by improving our docs and tackle `issue
+    14228 <https://github.com/scikit-learn/scikit-learn/issues/14228>`_!
+
+Joblib is able to support both multi-processing and multi-threading. Whether
+joblib chooses to spawn a thread or a process depends on the **backend**
+that it's using.
+
+Scikit-learn generally relies on the ``loky`` backend, which is joblib's
+default backend. Loky is a multi-processing backend. When doing
+multi-processing, in order to avoid duplicating the memory in each process
+(which isn't reasonable with big datasets), joblib will create a `memmap
+<https://docs.scipy.org/doc/numpy/reference/generated/numpy.memmap.html>`_
+that all processes can share, when the data is bigger than 1MB.
+
+In some specific cases (when the code that is run in parallel releases the
+GIL), scikit-learn will indicate to ``joblib`` that a multi-threading
+backend is preferable.
+
+As a user, you may control the backend that joblib will use (regardless of
+what scikit-learn recommends) by using a context manager::
+
+    from joblib import parallel_backend
+
+    with parallel_backend('threading', n_jobs=2):
+        # Your scikit-learn code here
+
+Please refer to the `joblib's docs
+<https://joblib.readthedocs.io/en/latest/parallel.html#thread-based-parallelism-vs-process-based-parallelism>`_
+for more details.
+
+In practice, whether parallelism is helpful at improving runtime depends on
+many factors. It is usually a good idea to experiment rather than assuming
+that increasing the number of workers is always a good thing. In some cases
+it can be highly detrimental to performance to run multiple copies of some
+estimators or functions in parallel (see oversubscription below).
+
+OpenMP-based parallelism
+........................
+
+OpenMP is used to parallelize code written in Cython or C, relying on
+multi-threading exclusively. By default (and unless joblib is trying to
+avoid oversubscription), the implementation will use as many threads as
+possible.
+
+You can control the exact number of threads that are used via the
+``OMP_NUM_THREADS`` environment variable:
+
+.. prompt:: bash $
+
+    OMP_NUM_THREADS=4 python my_script.py
+
+Parallel Numpy routines from numerical libraries
+................................................
+
+Scikit-learn relies heavily on NumPy and SciPy, which internally call
+multi-threaded linear algebra routines implemented in libraries such as MKL,
+OpenBLAS or BLIS.
+
+The number of threads used by the OpenBLAS, MKL or BLIS libraries can be set
+via the ``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS``, and
+``BLIS_NUM_THREADS`` environment variables.
+
+Please note that scikit-learn has no direct control over these
+implementations. Scikit-learn solely relies on Numpy and Scipy.
+
+.. note::
+    At the time of writing (2019), NumPy and SciPy packages distributed on
+    pypi.org (used by ``pip``) and on the conda-forge channel are linked
+    with OpenBLAS, while conda packages shipped on the "defaults" channel
+    from anaconda.org are linked by default with MKL.
+
+
+Oversubscription: spawning too many threads
+...........................................
+
+It is generally recommended to avoid using significantly more processes or
+threads than the number of CPUs on a machine. Over-subscription happens when
+a program is running too many threads at the same time.
+
+Suppose you have a machine with 8 CPUs. Consider a case where you're running
+a :class:`~sklearn.model_selection.GridSearchCV` (parallelized with joblib)
+with ``n_jobs=8`` over a
+:class:`~sklearn.ensemble.HistGradientBoostingClassifier` (parallelized with
+OpenMP). Each instance of
+:class:`~sklearn.ensemble.HistGradientBoostingClassifier` will spawn 8 threads
+(since you have 8 CPUs). That's a total of ``8 * 8 = 64`` threads, which
+leads to oversubscription of physical CPU resources and to scheduling
+overhead.
+
+Oversubscription can arise in the exact same fashion with parallelized
+routines from MKL, OpenBLAS or BLIS that are nested in joblib calls.
+
+Starting from ``joblib >= 0.14``, when the ``loky`` backend is used (which
+is the default), joblib will tell its child **processes** to limit the
+number of threads they can use, so as to avoid oversubscription. In practice
+the heuristic that joblib uses is to tell the processes to use ``max_threads
+= n_cpus // n_jobs``, via their corresponding environment variable. Back to
+our example from above, since the joblib backend of
+:class:`~sklearn.model_selection.GridSearchCV` is ``loky``, each process will
+only be able to use 1 thread instead of 8, thus mitigating the
+oversubscription issue.
+
+Note that:
+
+- Manually setting one of the environment variables (``OMP_NUM_THREADS``,
+  ``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS``, or ``BLIS_NUM_THREADS``)
+  will take precedence over what joblib tries to do. The total number of
+  threads will be ``n_jobs * <LIB>_NUM_THREADS``. Note that setting this
+  limit will also impact your computations in the main process, which will
+  only use ``<LIB>_NUM_THREADS``. Joblib exposes a context manager for
+  finer control over the number of threads in its workers (see joblib docs
+  linked below).
+- Joblib is currently unable to avoid oversubscription in a
+  multi-threading context. It can only do so with the ``loky`` backend
+  (which spawns processes).
+
+You will find additional details about joblib mitigation of oversubscription
+in `joblib documentation
+<https://joblib.readthedocs.io/en/latest/parallel.html#avoiding-over-subscription-of-cpu-ressources>`_.
+
+
+Configuration switches
+-----------------------
+
+Python runtime
+..............
+
+:func:`sklearn.set_config` controls the following behaviors:
+
+`assume_finite`
+~~~~~~~~~~~~~~~
+
+Used to skip validation, which enables faster computations but may lead to
+segmentation faults if the data contains NaNs.
+
+`working_memory`
+~~~~~~~~~~~~~~~~
+
+The optimal size of temporary arrays used by some algorithms.
+
+.. _environment_variable:
+
+Environment variables
+......................
+
+These environment variables should be set before importing scikit-learn.
+
+`SKLEARN_ASSUME_FINITE`
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Sets the default value for the `assume_finite` argument of
+:func:`sklearn.set_config`.
+
+`SKLEARN_WORKING_MEMORY`
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Sets the default value for the `working_memory` argument of
+:func:`sklearn.set_config`.
+
+`SKLEARN_SEED`
+~~~~~~~~~~~~~~
+
+Sets the seed of the global random generator when running the tests, for
+reproducibility.
+
+Note that scikit-learn tests are expected to run deterministically with
+explicit seeding of their own independent RNG instances instead of relying on
+the numpy or Python standard library RNG singletons to make sure that test
+results are independent of the test execution order. However some tests might
+forget to use explicit seeding and this variable is a way to control the initial
+state of the aforementioned singletons.
+
+`SKLEARN_TESTS_GLOBAL_RANDOM_SEED`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Controls the seeding of the random number generator used in tests that rely on
+the `global_random_seed`` fixture.
+
+All tests that use this fixture accept the contract that they should
+deterministically pass for any seed value from 0 to 99 included.
+
+If the `SKLEARN_TESTS_GLOBAL_RANDOM_SEED` environment variable is set to
+`"any"` (which should be the case on nightly builds on the CI), the fixture
+will choose an arbitrary seed in the above range (based on the BUILD_NUMBER or
+the current day) and all fixtured tests will run for that specific seed. The
+goal is to ensure that, over time, our CI will run all tests with different
+seeds while keeping the test duration of a single run of the full test suite
+limited. This will check that the assertions of tests written to use this
+fixture are not dependent on a specific seed value.
+
+The range of admissible seed values is limited to [0, 99] because it is often
+not possible to write a test that can work for any possible seed and we want to
+avoid having tests that randomly fail on the CI.
+
+Valid values for `SKLEARN_TESTS_GLOBAL_RANDOM_SEED`:
+
+- `SKLEARN_TESTS_GLOBAL_RANDOM_SEED="42"`: run tests with a fixed seed of 42
+- `SKLEARN_TESTS_GLOBAL_RANDOM_SEED="40-42"`: run the tests with all seeds
+  between 40 and 42 included
+- `SKLEARN_TESTS_GLOBAL_RANDOM_SEED="any"`: run the tests with an arbitrary
+  seed selected between 0 and 99 included
+- `SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all"`: run the tests with all seeds
+  between 0 and 99 included. This can take a long time: only use for individual
+  tests, not the full test suite!
+
+If the variable is not set, then 42 is used as the global seed in a
+deterministic manner. This ensures that, by default, the scikit-learn test
+suite is as deterministic as possible to avoid disrupting our friendly
+third-party package maintainers. Similarly, this variable should not be set in
+the CI config of pull-requests to make sure that our friendly contributors are
+not the first people to encounter a seed-sensitivity regression in a test
+unrelated to the changes of their own PR. Only the scikit-learn maintainers who
+watch the results of the nightly builds are expected to be annoyed by this.
+
+When writing a new test function that uses this fixture, please use the
+following command to make sure that it passes deterministically for all
+admissible seeds on your local machine:
+
+.. prompt:: bash $
+
+    SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all" pytest -v -k test_your_test_name
+
+`SKLEARN_SKIP_NETWORK_TESTS`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When this environment variable is set to a non zero value, the tests that need
+network access are skipped. When this environment variable is not set then
+network tests are skipped.
+
+`SKLEARN_RUN_FLOAT32_TESTS`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When this environment variable is set to '1', the tests using the
+`global_dtype` fixture are also run on float32 data.
+When this environment variable is not set, the tests are only run on
+float64 data.
+
+`SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When this environment variable is set to a non zero value, the `Cython`
+derivative, `boundscheck` is set to `True`. This is useful for finding
+segfaults.
diff -pruN 0.23.2-5/doc/computing/scaling_strategies.rst 1.1.1-1/doc/computing/scaling_strategies.rst
--- 0.23.2-5/doc/computing/scaling_strategies.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/computing/scaling_strategies.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,140 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. _scaling_strategies:
+
+Strategies to scale computationally: bigger data
+=================================================
+
+For some applications the amount of examples, features (or both) and/or the
+speed at which they need to be processed are challenging for traditional
+approaches. In these cases scikit-learn has a number of options you can
+consider to make your system scale.
+
+Scaling with instances using out-of-core learning
+--------------------------------------------------
+
+Out-of-core (or "external memory") learning is a technique used to learn from
+data that cannot fit in a computer's main memory (RAM).
+
+Here is a sketch of a system designed to achieve this goal:
+
+  1. a way to stream instances
+  2. a way to extract features from instances
+  3. an incremental algorithm
+
+Streaming instances
+....................
+
+Basically, 1. may be a reader that yields instances from files on a
+hard drive, a database, from a network stream etc. However,
+details on how to achieve this are beyond the scope of this documentation.
+
+Extracting features
+...................
+
+\2. could be any relevant way to extract features among the
+different :ref:`feature extraction <feature_extraction>` methods supported by
+scikit-learn. However, when working with data that needs vectorization and
+where the set of features or values is not known in advance one should take
+explicit care. A good example is text classification where unknown terms are
+likely to be found during training. It is possible to use a stateful
+vectorizer if making multiple passes over the data is reasonable from an
+application point of view. Otherwise, one can turn up the difficulty by using
+a stateless feature extractor. Currently the preferred way to do this is to
+use the so-called :ref:`hashing trick<feature_hashing>` as implemented by
+:class:`sklearn.feature_extraction.FeatureHasher` for datasets with categorical
+variables represented as list of Python dicts or
+:class:`sklearn.feature_extraction.text.HashingVectorizer` for text documents.
+
+Incremental learning
+.....................
+
+Finally, for 3. we have a number of options inside scikit-learn. Although not
+all algorithms can learn incrementally (i.e. without seeing all the instances
+at once), all estimators implementing the ``partial_fit`` API are candidates.
+Actually, the ability to learn incrementally from a mini-batch of instances
+(sometimes called "online learning") is key to out-of-core learning as it
+guarantees that at any given time there will be only a small amount of
+instances in the main memory. Choosing a good size for the mini-batch that
+balances relevancy and memory footprint could involve some tuning [1]_.
+
+Here is a list of incremental estimators for different tasks:
+
+  - Classification
+      + :class:`sklearn.naive_bayes.MultinomialNB`
+      + :class:`sklearn.naive_bayes.BernoulliNB`
+      + :class:`sklearn.linear_model.Perceptron`
+      + :class:`sklearn.linear_model.SGDClassifier`
+      + :class:`sklearn.linear_model.PassiveAggressiveClassifier`
+      + :class:`sklearn.neural_network.MLPClassifier`
+  - Regression
+      + :class:`sklearn.linear_model.SGDRegressor`
+      + :class:`sklearn.linear_model.PassiveAggressiveRegressor`
+      + :class:`sklearn.neural_network.MLPRegressor`
+  - Clustering
+      + :class:`sklearn.cluster.MiniBatchKMeans`
+      + :class:`sklearn.cluster.Birch`
+  - Decomposition / feature Extraction
+      + :class:`sklearn.decomposition.MiniBatchDictionaryLearning`
+      + :class:`sklearn.decomposition.IncrementalPCA`
+      + :class:`sklearn.decomposition.LatentDirichletAllocation`
+      + :class:`sklearn.decomposition.MiniBatchNMF`
+  - Preprocessing
+      + :class:`sklearn.preprocessing.StandardScaler`
+      + :class:`sklearn.preprocessing.MinMaxScaler`
+      + :class:`sklearn.preprocessing.MaxAbsScaler`
+
+For classification, a somewhat important thing to note is that although a
+stateless feature extraction routine may be able to cope with new/unseen
+attributes, the incremental learner itself may be unable to cope with
+new/unseen targets classes. In this case you have to pass all the possible
+classes to the first ``partial_fit`` call using the ``classes=`` parameter.
+
+Another aspect to consider when choosing a proper algorithm is that not all of
+them put the same importance on each example over time. Namely, the
+``Perceptron`` is still sensitive to badly labeled examples even after many
+examples whereas the ``SGD*`` and ``PassiveAggressive*`` families are more
+robust to this kind of artifacts. Conversely, the latter also tend to give less
+importance to remarkably different, yet properly labeled examples when they
+come late in the stream as their learning rate decreases over time.
+
+Examples
+..........
+
+Finally, we have a full-fledged example of
+:ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. It is aimed at
+providing a starting point for people wanting to build out-of-core learning
+systems and demonstrates most of the notions discussed above.
+
+Furthermore, it also shows the evolution of the performance of different
+algorithms with the number of processed examples.
+
+.. |accuracy_over_time| image::  ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_001.png
+    :target: ../auto_examples/applications/plot_out_of_core_classification.html
+    :scale: 80
+
+.. centered:: |accuracy_over_time|
+
+Now looking at the computation time of the different parts, we see that the
+vectorization is much more expensive than learning itself. From the different
+algorithms, ``MultinomialNB`` is the most expensive, but its overhead can be
+mitigated by increasing the size of the mini-batches (exercise: change
+``minibatch_size`` to 100 and 10000 in the program and compare).
+
+.. |computation_time| image::  ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_003.png
+    :target: ../auto_examples/applications/plot_out_of_core_classification.html
+    :scale: 80
+
+.. centered:: |computation_time|
+
+
+Notes
+......
+
+.. [1] Depending on the algorithm the mini-batch size can influence results or
+       not. SGD*, PassiveAggressive*, and discrete NaiveBayes are truly online
+       and are not affected by batch size. Conversely, MiniBatchKMeans
+       convergence rate is affected by the batch size. Also, its memory
+       footprint can vary dramatically with batch size.
diff -pruN 0.23.2-5/doc/computing.rst 1.1.1-1/doc/computing.rst
--- 0.23.2-5/doc/computing.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/computing.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,16 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+============================
+Computing with scikit-learn
+============================
+
+.. include:: includes/big_toc_css.rst
+
+.. toctree::
+    :maxdepth: 2
+
+    computing/scaling_strategies
+    computing/computational_performance
+    computing/parallelism
diff -pruN 0.23.2-5/doc/conf.py 1.1.1-1/doc/conf.py
--- 0.23.2-5/doc/conf.py	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/conf.py	2022-05-19 12:16:26.432781500 +0000
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # scikit-learn documentation build configuration file, created by
 # sphinx-quickstart on Fri Jan  8 09:13:42 2010.
 #
@@ -16,32 +14,54 @@ import sys
 import os
 import warnings
 import re
+from datetime import datetime
 from packaging.version import parse
 from pathlib import Path
+from io import StringIO
 
 # If extensions (or modules to document with autodoc) are in another
 # directory, add these directories to sys.path here. If the directory
 # is relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
-sys.path.insert(0, os.path.abspath('sphinxext'))
+sys.path.insert(0, os.path.abspath("sphinxext"))
 
 from github_link import make_linkcode_resolve
 import sphinx_gallery
+from sphinx_gallery.sorting import ExampleTitleSortKey
 
 # -- General configuration ---------------------------------------------------
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
-    'sphinx.ext.autodoc', 'sphinx.ext.autosummary',
-    'numpydoc',
-    'sphinx.ext.linkcode', 'sphinx.ext.doctest',
-    'sphinx.ext.intersphinx',
-    'sphinx.ext.imgconverter',
-    'sphinx_gallery.gen_gallery',
-    'sphinx_issues'
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "numpydoc",
+    "sphinx.ext.linkcode",
+    "sphinx.ext.doctest",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.imgconverter",
+    "sphinx_gallery.gen_gallery",
+    "sphinx_issues",
+    "add_toctree_functions",
+    "sphinx-prompt",
+    "sphinxext.opengraph",
+    "doi_role",
+    "allow_nan_estimators",
+    "matplotlib.sphinxext.plot_directive",
 ]
 
+# Produce `plot::` directives for examples that contain `import matplotlib` or
+# `from matplotlib import`.
+numpydoc_use_plots = True
+
+# Options for the `::plot` directive:
+# https://matplotlib.org/stable/api/sphinxext_plot_directive_api.html
+plot_formats = ["png"]
+plot_include_source = True
+plot_html_show_formats = False
+plot_html_show_source_link = False
+
 # this is needed for some reason...
 # see https://github.com/numpy/numpydoc/issues/69
 numpydoc_class_members_toctree = False
@@ -49,38 +69,34 @@ numpydoc_class_members_toctree = False
 
 # For maths, use mathjax by default and svg if NO_MATHJAX env variable is set
 # (useful for viewing the doc offline)
-if os.environ.get('NO_MATHJAX'):
-    extensions.append('sphinx.ext.imgmath')
-    imgmath_image_format = 'svg'
-    mathjax_path = ''
+if os.environ.get("NO_MATHJAX"):
+    extensions.append("sphinx.ext.imgmath")
+    imgmath_image_format = "svg"
+    mathjax_path = ""
 else:
-    extensions.append('sphinx.ext.mathjax')
-    mathjax_path = ('https://cdn.jsdelivr.net/npm/mathjax@3/es5/'
-                    'tex-chtml.js')
-
-autodoc_default_options = {
-    'members': True,
-    'inherited-members': True
-}
+    extensions.append("sphinx.ext.mathjax")
+    mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"
+
+autodoc_default_options = {"members": True, "inherited-members": True}
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['templates']
+templates_path = ["templates"]
 
 # generate autosummary even if no references
 autosummary_generate = True
 
 # The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8'
+# source_encoding = 'utf-8'
 
-# The master toctree document.
-master_doc = 'contents'
+# The main toctree document.
+root_doc = "contents"
 
 # General information about the project.
-project = 'scikit-learn'
-copyright = '2007 - 2020, scikit-learn developers (BSD License)'
+project = "scikit-learn"
+copyright = f"2007 - {datetime.now().year}, scikit-learn developers (BSD License)"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -88,6 +104,7 @@ copyright = '2007 - 2020, scikit-learn d
 #
 # The short X.Y version.
 import sklearn
+
 parsed_version = parse(sklearn.__version__)
 version = ".".join(parsed_version.base_version.split(".")[:2])
 # The full version, including alpha/beta/rc tags.
@@ -99,89 +116,90 @@ else:
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-#language = None
+# language = None
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build', 'templates', 'includes', 'themes']
+exclude_patterns = ["_build", "templates", "includes", "themes"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-default_role = 'literal'
+default_role = "literal"
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
 add_function_parentheses = False
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  Major themes that come with
 # Sphinx are currently 'default' and 'sphinxdoc'.
-html_theme = 'scikit-learn-modern'
+html_theme = "scikit-learn-modern"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-html_theme_options = {'google_analytics': True,
-                      'mathjax_path': mathjax_path}
+html_theme_options = {
+    "google_analytics": True,
+    "mathjax_path": mathjax_path,
+    "link_to_live_contributing_page": not parsed_version.is_devrelease,
+}
 
 # Add any paths that contain custom themes here, relative to this directory.
-html_theme_path = ['themes']
+html_theme_path = ["themes"]
 
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-html_short_title = 'scikit-learn'
+html_short_title = "scikit-learn"
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-html_logo = 'logos/scikit-learn-logo-small.png'
+html_logo = "logos/scikit-learn-logo-small.png"
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-html_favicon = 'logos/favicon.ico'
+html_favicon = "logos/favicon.ico"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['images']
+html_static_path = ["images"]
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-html_additional_pages = {
-    'index': 'index.html',
-    'documentation': 'documentation.html'}  # redirects to index
+html_additional_pages = {"index": "index.html"}
 
 # If false, no module index is generated.
 html_domain_indices = False
@@ -190,21 +208,21 @@ html_domain_indices = False
 html_use_index = False
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = ''
+# html_file_suffix = ''
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'scikit-learndoc'
+htmlhelp_basename = "scikit-learndoc"
 
 # If true, the reST sources are included in the HTML build as _sources/name.
 html_copy_source = True
@@ -215,37 +233,63 @@ html_context = {}
 # index.html
 release_highlights_dir = Path("..") / "examples" / "release_highlights"
 # Finds the highlight with the latest version number
-latest_highlights = sorted(release_highlights_dir.glob(
-                           "plot_release_highlights_*.py"))[-1]
-latest_highlights = latest_highlights.with_suffix('').name
-html_context["release_highlights"] = \
-    f"auto_examples/release_highlights/{latest_highlights}"
+latest_highlights = sorted(release_highlights_dir.glob("plot_release_highlights_*.py"))[
+    -1
+]
+latest_highlights = latest_highlights.with_suffix("").name
+html_context[
+    "release_highlights"
+] = f"auto_examples/release_highlights/{latest_highlights}"
 
-# get version from higlight name assuming highlights have the form
+# get version from highlight name assuming highlights have the form
 # plot_release_highlights_0_22_0
 highlight_version = ".".join(latest_highlights.split("_")[-3:-1])
 html_context["release_highlights_version"] = highlight_version
 
+
+# redirects dictionary maps from old links to new links
+redirects = {
+    "documentation": "index",
+    "auto_examples/feature_selection/plot_permutation_test_for_classification": (
+        "auto_examples/model_selection/plot_permutation_tests_for_classification"
+    ),
+    "modules/model_persistence": "model_persistence",
+    "auto_examples/linear_model/plot_bayesian_ridge": (
+        "auto_examples/linear_model/plot_ard"
+    ),
+}
+html_context["redirects"] = redirects
+for old_link in redirects:
+    html_additional_pages[old_link] = "redirects.html"
+
+
 # -- Options for LaTeX output ------------------------------------------------
 latex_elements = {
     # The paper size ('letterpaper' or 'a4paper').
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
-    'preamble': r"""
+    "preamble": r"""
         \usepackage{amsmath}\usepackage{amsfonts}\usepackage{bm}
         \usepackage{morefloats}\usepackage{enumitem} \setlistdepth{10}
+        \let\oldhref\href
+        \renewcommand{\href}[2]{\oldhref{#1}{\hbox{#2}}}
         """
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass
 # [howto/manual]).
-latex_documents = [('contents', 'user_guide.tex', 'scikit-learn user guide',
-                    'scikit-learn developers', 'manual'), ]
+latex_documents = [
+    (
+        "contents",
+        "user_guide.tex",
+        "scikit-learn user guide",
+        "scikit-learn developers",
+        "manual",
+    ),
+]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
@@ -261,27 +305,26 @@ trim_doctests_flags = True
 
 # intersphinx configuration
 intersphinx_mapping = {
-    'python': ('https://docs.python.org/{.major}'.format(
-        sys.version_info), None),
-    'numpy': ('https://docs.scipy.org/doc/numpy/', None),
-    'scipy': ('https://docs.scipy.org/doc/scipy/reference', None),
-    'matplotlib': ('https://matplotlib.org/', None),
-    'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
-    'joblib': ('https://joblib.readthedocs.io/en/latest/', None),
-    'seaborn': ('https://seaborn.pydata.org/', None),
+    "python": ("https://docs.python.org/{.major}".format(sys.version_info), None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy/", None),
+    "matplotlib": ("https://matplotlib.org/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+    "joblib": ("https://joblib.readthedocs.io/en/latest/", None),
+    "seaborn": ("https://seaborn.pydata.org/", None),
 }
 
 v = parse(release)
 if v.release is None:
     raise ValueError(
-        'Ill-formed version: {!r}. Version should follow '
-        'PEP440'.format(version))
+        "Ill-formed version: {!r}. Version should follow PEP440".format(version)
+    )
 
 if v.is_devrelease:
-    binder_branch = 'master'
+    binder_branch = "main"
 else:
     major, minor = v.release[:2]
-    binder_branch = '{}.{}.X'.format(major, minor)
+    binder_branch = "{}.{}.X".format(major, minor)
 
 
 class SubSectionTitleOrder:
@@ -290,12 +333,13 @@ class SubSectionTitleOrder:
     Assumes README.txt exists for all subsections and uses the subsection with
     dashes, '---', as the adornment.
     """
+
     def __init__(self, src_dir):
         self.src_dir = src_dir
         self.regex = re.compile(r"^([\w ]+)\n-", re.MULTILINE)
 
     def __repr__(self):
-        return '<%s>' % (self.__class__.__name__,)
+        return "<%s>" % (self.__class__.__name__,)
 
     def __call__(self, directory):
         src_path = os.path.normpath(os.path.join(self.src_dir, directory))
@@ -307,7 +351,7 @@ class SubSectionTitleOrder:
         readme = os.path.join(src_path, "README.txt")
 
         try:
-            with open(readme, 'r') as f:
+            with open(readme, "r") as f:
                 content = f.read()
         except FileNotFoundError:
             return directory
@@ -318,26 +362,45 @@ class SubSectionTitleOrder:
         return directory
 
 
+class SKExampleTitleSortKey(ExampleTitleSortKey):
+    """Sorts release highlights based on version number."""
+
+    def __call__(self, filename):
+        title = super().__call__(filename)
+        prefix = "plot_release_highlights_"
+
+        # Use title to sort if not a release highlight
+        if not filename.startswith(prefix):
+            return title
+
+        major_minor = filename[len(prefix) :].split("_")[:2]
+        version_float = float(".".join(major_minor))
+
+        # negate to place the newest version highlights first
+        return -version_float
+
+
 sphinx_gallery_conf = {
-    'doc_module': 'sklearn',
-    'backreferences_dir': os.path.join('modules', 'generated'),
-    'show_memory': False,
-    'reference_url': {
-        'sklearn': None},
-    'examples_dirs': ['../examples'],
-    'gallery_dirs': ['auto_examples'],
-    'subsection_order': SubSectionTitleOrder('../examples'),
-    'binder': {
-        'org': 'scikit-learn',
-        'repo': 'scikit-learn',
-        'binderhub_url': 'https://mybinder.org',
-        'branch': binder_branch,
-        'dependencies': './binder/requirements.txt',
-        'use_jupyter_lab': True
+    "doc_module": "sklearn",
+    "backreferences_dir": os.path.join("modules", "generated"),
+    "show_memory": False,
+    "reference_url": {"sklearn": None},
+    "examples_dirs": ["../examples"],
+    "gallery_dirs": ["auto_examples"],
+    "subsection_order": SubSectionTitleOrder("../examples"),
+    "within_subsection_order": SKExampleTitleSortKey,
+    "binder": {
+        "org": "scikit-learn",
+        "repo": "scikit-learn",
+        "binderhub_url": "https://mybinder.org",
+        "branch": binder_branch,
+        "dependencies": "./binder/requirements.txt",
+        "use_jupyter_lab": True,
     },
     # avoid generating too many cross links
-    'inspect_global_variables': False,
-    'remove_config_comments': True,
+    "inspect_global_variables": False,
+    "remove_config_comments": True,
+    "plot_gallery": "True",
 }
 
 
@@ -345,26 +408,26 @@ sphinx_gallery_conf = {
 # thumbnails for the front page of the scikit-learn home page.
 # key: first image in set
 # values: (number of plot in set, height of thumbnail)
-carousel_thumbs = {'sphx_glr_plot_classifier_comparison_001.png': 600}
+carousel_thumbs = {"sphx_glr_plot_classifier_comparison_001.png": 600}
 
 
 # enable experimental module so that experimental estimators can be
 # discovered properly by sphinx
-from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.experimental import enable_iterative_imputer  # noqa
+from sklearn.experimental import enable_halving_search_cv  # noqa
 
 
 def make_carousel_thumbs(app, exception):
     """produces the final resized carousel images"""
     if exception is not None:
         return
-    print('Preparing carousel images')
+    print("Preparing carousel images")
 
-    image_dir = os.path.join(app.builder.outdir, '_images')
+    image_dir = os.path.join(app.builder.outdir, "_images")
     for glr_plot, max_width in carousel_thumbs.items():
         image = os.path.join(image_dir, glr_plot)
         if os.path.exists(image):
-            c_thumb = os.path.join(image_dir, glr_plot[:-4] + '_carousel.png')
+            c_thumb = os.path.join(image_dir, glr_plot[:-4] + "_carousel.png")
             sphinx_gallery.gen_rst.scale_image(image, c_thumb, max_width, 190)
 
 
@@ -373,40 +436,133 @@ def filter_search_index(app, exception):
         return
 
     # searchindex only exist when generating html
-    if app.builder.name != 'html':
+    if app.builder.name != "html":
         return
 
-    print('Removing methods from search index')
+    print("Removing methods from search index")
 
-    searchindex_path = os.path.join(app.builder.outdir, 'searchindex.js')
-    with open(searchindex_path, 'r') as f:
+    searchindex_path = os.path.join(app.builder.outdir, "searchindex.js")
+    with open(searchindex_path, "r") as f:
         searchindex_text = f.read()
 
-    searchindex_text = re.sub(r'{__init__.+?}', '{}', searchindex_text)
-    searchindex_text = re.sub(r'{__call__.+?}', '{}', searchindex_text)
+    searchindex_text = re.sub(r"{__init__.+?}", "{}", searchindex_text)
+    searchindex_text = re.sub(r"{__call__.+?}", "{}", searchindex_text)
 
-    with open(searchindex_path, 'w') as f:
+    with open(searchindex_path, "w") as f:
         f.write(searchindex_text)
 
 
+def generate_min_dependency_table(app):
+    """Generate min dependency table for docs."""
+    from sklearn._min_dependencies import dependent_packages
+
+    # get length of header
+    package_header_len = max(len(package) for package in dependent_packages) + 4
+    version_header_len = len("Minimum Version") + 4
+    tags_header_len = max(len(tags) for _, tags in dependent_packages.values()) + 4
+
+    output = StringIO()
+    output.write(
+        " ".join(
+            ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len]
+        )
+    )
+    output.write("\n")
+    dependency_title = "Dependency"
+    version_title = "Minimum Version"
+    tags_title = "Purpose"
+
+    output.write(
+        f"{dependency_title:<{package_header_len}} "
+        f"{version_title:<{version_header_len}} "
+        f"{tags_title}\n"
+    )
+
+    output.write(
+        " ".join(
+            ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len]
+        )
+    )
+    output.write("\n")
+
+    for package, (version, tags) in dependent_packages.items():
+        output.write(
+            f"{package:<{package_header_len}} {version:<{version_header_len}} {tags}\n"
+        )
+
+    output.write(
+        " ".join(
+            ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len]
+        )
+    )
+    output.write("\n")
+    output = output.getvalue()
+
+    with (Path(".") / "min_dependency_table.rst").open("w") as f:
+        f.write(output)
+
+
+def generate_min_dependency_substitutions(app):
+    """Generate min dependency substitutions for docs."""
+    from sklearn._min_dependencies import dependent_packages
+
+    output = StringIO()
+
+    for package, (version, _) in dependent_packages.items():
+        package = package.capitalize()
+        output.write(f".. |{package}MinVersion| replace:: {version}")
+        output.write("\n")
+
+    output = output.getvalue()
+
+    with (Path(".") / "min_dependency_substitutions.rst").open("w") as f:
+        f.write(output)
+
+
 # Config for sphinx_issues
 
 # we use the issues path for PRs since the issues URL will forward
-issues_github_path = 'scikit-learn/scikit-learn'
+issues_github_path = "scikit-learn/scikit-learn"
 
 
 def setup(app):
+    app.connect("builder-inited", generate_min_dependency_table)
+    app.connect("builder-inited", generate_min_dependency_substitutions)
     # to hide/show the prompt in code examples:
-    app.connect('build-finished', make_carousel_thumbs)
-    app.connect('build-finished', filter_search_index)
+    app.connect("build-finished", make_carousel_thumbs)
+    app.connect("build-finished", filter_search_index)
 
 
 # The following is used by sphinx.ext.linkcode to provide links to github
-linkcode_resolve = make_linkcode_resolve('sklearn',
-                                         'https://github.com/scikit-learn/'
-                                         'scikit-learn/blob/{revision}/'
-                                         '{package}/{path}#L{lineno}')
-
-warnings.filterwarnings("ignore", category=UserWarning,
-                        message='Matplotlib is currently using agg, which is a'
-                                ' non-GUI backend, so cannot show the figure.')
+linkcode_resolve = make_linkcode_resolve(
+    "sklearn",
+    "https://github.com/scikit-learn/"
+    "scikit-learn/blob/{revision}/"
+    "{package}/{path}#L{lineno}",
+)
+
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message=(
+        "Matplotlib is currently using agg, which is a"
+        " non-GUI backend, so cannot show the figure."
+    ),
+)
+
+
+# maps functions with a class name that is indistinguishable when case is
+# ignore to another filename
+autosummary_filename_map = {
+    "sklearn.cluster.dbscan": "dbscan-function",
+    "sklearn.covariance.oas": "oas-function",
+    "sklearn.decomposition.fastica": "fastica-function",
+}
+
+
+# Config for sphinxext.opengraph
+
+ogp_site_url = "https://scikit-learn/stable/"
+ogp_image = "https://scikit-learn.org/stable/_static/scikit-learn-logo-small.png"
+ogp_use_first_image = True
+ogp_site_name = "scikit-learn"
diff -pruN 0.23.2-5/doc/conftest.py 1.1.1-1/doc/conftest.py
--- 0.23.2-5/doc/conftest.py	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/conftest.py	2022-05-19 12:16:26.432781500 +0000
@@ -1,13 +1,13 @@
 import os
 from os.path import exists
 from os.path import join
+from os import environ
 import warnings
 
-import numpy as np
-
 from sklearn.utils import IS_PYPY
 from sklearn.utils._testing import SkipTest
 from sklearn.utils._testing import check_skip_network
+from sklearn.utils.fixes import parse_version
 from sklearn.datasets import get_data_home
 from sklearn.datasets._base import _pkl_filepath
 from sklearn.datasets._twenty_newsgroups import CACHE_NAME
@@ -15,7 +15,7 @@ from sklearn.datasets._twenty_newsgroups
 
 def setup_labeled_faces():
     data_home = get_data_home()
-    if not exists(join(data_home, 'lfw_home')):
+    if not exists(join(data_home, "lfw_home")):
         raise SkipTest("Skipping dataset loading doctests")
 
 
@@ -28,21 +28,35 @@ def setup_rcv1():
 
 
 def setup_twenty_newsgroups():
-    data_home = get_data_home()
     cache_path = _pkl_filepath(get_data_home(), CACHE_NAME)
     if not exists(cache_path):
         raise SkipTest("Skipping dataset loading doctests")
 
 
 def setup_working_with_text_data():
-    if IS_PYPY and os.environ.get('CI', None):
-        raise SkipTest('Skipping too slow test with PyPy on CI')
+    if IS_PYPY and os.environ.get("CI", None):
+        raise SkipTest("Skipping too slow test with PyPy on CI")
     check_skip_network()
     cache_path = _pkl_filepath(get_data_home(), CACHE_NAME)
     if not exists(cache_path):
         raise SkipTest("Skipping dataset loading doctests")
 
 
+def setup_loading_other_datasets():
+    try:
+        import pandas  # noqa
+    except ImportError:
+        raise SkipTest("Skipping loading_other_datasets.rst, pandas not installed")
+
+    # checks SKLEARN_SKIP_NETWORK_TESTS to see if test should run
+    run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
+    if not run_network_tests:
+        raise SkipTest(
+            "Skipping loading_other_datasets.rst, tests can be "
+            "enabled by setting SKLEARN_SKIP_NETWORK_TESTS=0"
+        )
+
+
 def setup_compose():
     try:
         import pandas  # noqa
@@ -57,34 +71,88 @@ def setup_impute():
         raise SkipTest("Skipping impute.rst, pandas not installed")
 
 
+def setup_grid_search():
+    try:
+        import pandas  # noqa
+    except ImportError:
+        raise SkipTest("Skipping grid_search.rst, pandas not installed")
+
+
+def setup_preprocessing():
+    try:
+        import pandas  # noqa
+
+        if parse_version(pandas.__version__) < parse_version("1.1.0"):
+            raise SkipTest("Skipping preprocessing.rst, pandas version < 1.1.0")
+    except ImportError:
+        raise SkipTest("Skipping preprocessing.rst, pandas not installed")
+
+
 def setup_unsupervised_learning():
     try:
         import skimage  # noqa
     except ImportError:
-        raise SkipTest("Skipping unsupervised_learning.rst, scikit-image "
-                       "not installed")
+        raise SkipTest("Skipping unsupervised_learning.rst, scikit-image not installed")
     # ignore deprecation warnings from scipy.misc.face
-    warnings.filterwarnings('ignore', 'The binary mode of fromstring',
-                            DeprecationWarning)
+    warnings.filterwarnings(
+        "ignore", "The binary mode of fromstring", DeprecationWarning
+    )
+
+
+def skip_if_matplotlib_not_installed(fname):
+    try:
+        import matplotlib  # noqa
+    except ImportError:
+        basename = os.path.basename(fname)
+        raise SkipTest(f"Skipping doctests for {basename}, matplotlib not installed")
 
 
 def pytest_runtest_setup(item):
     fname = item.fspath.strpath
-    is_index = fname.endswith('datasets/index.rst')
-    if fname.endswith('datasets/labeled_faces.rst') or is_index:
+    # normalize filename to use forward slashes on Windows for easier handling
+    # later
+    fname = fname.replace(os.sep, "/")
+
+    is_index = fname.endswith("datasets/index.rst")
+    if fname.endswith("datasets/labeled_faces.rst") or is_index:
         setup_labeled_faces()
-    elif fname.endswith('datasets/rcv1.rst') or is_index:
+    elif fname.endswith("datasets/rcv1.rst") or is_index:
         setup_rcv1()
-    elif fname.endswith('datasets/twenty_newsgroups.rst') or is_index:
+    elif fname.endswith("datasets/twenty_newsgroups.rst") or is_index:
         setup_twenty_newsgroups()
-    elif fname.endswith('tutorial/text_analytics/working_with_text_data.rst')\
-            or is_index:
+    elif (
+        fname.endswith("tutorial/text_analytics/working_with_text_data.rst") or is_index
+    ):
         setup_working_with_text_data()
-    elif fname.endswith('modules/compose.rst') or is_index:
+    elif fname.endswith("modules/compose.rst") or is_index:
         setup_compose()
-    elif IS_PYPY and fname.endswith('modules/feature_extraction.rst'):
-        raise SkipTest('FeatureHasher is not compatible with PyPy')
-    elif fname.endswith('modules/impute.rst'):
+    elif fname.endswith("datasets/loading_other_datasets.rst"):
+        setup_loading_other_datasets()
+    elif fname.endswith("modules/impute.rst"):
         setup_impute()
-    elif fname.endswith('statistical_inference/unsupervised_learning.rst'):
+    elif fname.endswith("modules/grid_search.rst"):
+        setup_grid_search()
+    elif fname.endswith("modules/preprocessing.rst"):
+        setup_preprocessing()
+    elif fname.endswith("statistical_inference/unsupervised_learning.rst"):
         setup_unsupervised_learning()
+
+    rst_files_requiring_matplotlib = [
+        "modules/partial_dependence.rst",
+        "modules/tree.rst",
+        "tutorial/statistical_inference/settings.rst",
+        "tutorial/statistical_inference/supervised_learning.rst",
+    ]
+    for each in rst_files_requiring_matplotlib:
+        if fname.endswith(each):
+            skip_if_matplotlib_not_installed(fname)
+
+
+def pytest_configure(config):
+    # Use matplotlib agg backend during the tests including doctests
+    try:
+        import matplotlib
+
+        matplotlib.use("agg")
+    except ImportError:
+        pass
diff -pruN 0.23.2-5/doc/contributor_experience_team.rst 1.1.1-1/doc/contributor_experience_team.rst
--- 0.23.2-5/doc/contributor_experience_team.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/contributor_experience_team.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,40 @@
+.. raw :: html
+
+    <!-- Generated by generate_authors_table.py -->
+    <div class="sk-authors-container">
+    <style>
+      img.avatar {border-radius: 10px;}
+    </style>
+    <div>
+    <a href='https://github.com/alfaro96'><img src='https://avatars.githubusercontent.com/u/32649176?v=4' class='avatar' /></a> <br />
+    <p>Juan Carlos Alfaro Jiménez</p>
+    </div>
+    <div>
+    <a href='https://github.com/lucyleeow'><img src='https://avatars.githubusercontent.com/u/23182829?v=4' class='avatar' /></a> <br />
+    <p>Lucy Liu</p>
+    </div>
+    <div>
+    <a href='https://github.com/jmloyola'><img src='https://avatars.githubusercontent.com/u/2133361?v=4' class='avatar' /></a> <br />
+    <p>Juan Martin Loyola</p>
+    </div>
+    <div>
+    <a href='https://github.com/smarie'><img src='https://avatars.githubusercontent.com/u/3236794?v=4' class='avatar' /></a> <br />
+    <p>Sylvain Marié</p>
+    </div>
+    <div>
+    <a href='https://github.com/cmarmo'><img src='https://avatars.githubusercontent.com/u/1662261?v=4' class='avatar' /></a> <br />
+    <p>Chiara Marmo</p>
+    </div>
+    <div>
+    <a href='https://github.com/norbusan'><img src='https://avatars.githubusercontent.com/u/1735589?v=4' class='avatar' /></a> <br />
+    <p>Norbert Preining</p>
+    </div>
+    <div>
+    <a href='https://github.com/reshamas'><img src='https://avatars.githubusercontent.com/u/2507232?v=4' class='avatar' /></a> <br />
+    <p>Reshama Shaikh</p>
+    </div>
+    <div>
+    <a href='https://github.com/albertcthomas'><img src='https://avatars.githubusercontent.com/u/15966638?v=4' class='avatar' /></a> <br />
+    <p>Albert Thomas</p>
+    </div>
+    </div>
\ No newline at end of file
diff -pruN 0.23.2-5/doc/datasets/index.rst 1.1.1-1/doc/datasets/index.rst
--- 0.23.2-5/doc/datasets/index.rst	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/datasets/index.rst	1970-01-01 00:00:00.000000000 +0000
@@ -1,529 +0,0 @@
-.. _datasets:
-
-=========================
-Dataset loading utilities
-=========================
-
-.. currentmodule:: sklearn.datasets
-
-The ``sklearn.datasets`` package embeds some small toy datasets
-as introduced in the :ref:`Getting Started <loading_example_dataset>` section.
-
-This package also features helpers to fetch larger datasets commonly
-used by the machine learning community to benchmark algorithms on data
-that comes from the 'real world'.
-
-To evaluate the impact of the scale of the dataset (``n_samples`` and
-``n_features``) while controlling the statistical properties of the data
-(typically the correlation and informativeness of the features), it is
-also possible to generate synthetic data.
-
-General dataset API
-===================
-
-There are three main kinds of dataset interfaces that can be used to get
-datasets depending on the desired type of dataset.
-
-**The dataset loaders.** They can be used to load small standard datasets,
-described in the :ref:`toy_datasets` section.
-
-**The dataset fetchers.** They can be used to download and load larger datasets,
-described in the :ref:`real_world_datasets` section.
-
-Both loaders and fetchers functions return a :class:`sklearn.utils.Bunch`
-object holding at least two items:
-an array of shape ``n_samples`` * ``n_features`` with
-key ``data`` (except for 20newsgroups) and a numpy array of
-length ``n_samples``, containing the target values, with key ``target``.
-
-The Bunch object is a dictionary that exposes its keys are attributes.
-For more information about Bunch object, see :class:`sklearn.utils.Bunch`:
-
-It's also possible for almost all of these function to constrain the output
-to be a tuple containing only the data and the target, by setting the
-``return_X_y`` parameter to ``True``.
-
-The datasets also contain a full description in their ``DESCR`` attribute and
-some contain ``feature_names`` and ``target_names``. See the dataset
-descriptions below for details.
-
-**The dataset generation functions.** They can be used to generate controlled
-synthetic datasets, described in the :ref:`sample_generators` section.
-
-These functions return a tuple ``(X, y)`` consisting of a ``n_samples`` *
-``n_features`` numpy array ``X`` and an array of length ``n_samples``
-containing the targets ``y``.
-
-In addition, there are also miscellaneous tools to load datasets of other
-formats or from other locations, described in the :ref:`loading_other_datasets`
-section.
-
-.. _toy_datasets:
-
-Toy datasets
-============
-
-scikit-learn comes with a few small standard datasets that do not require to
-download any file from some external website.
-
-They can be loaded using the following functions:
-
-.. autosummary::
-
-   :toctree: ../modules/generated/
-   :template: function.rst
-
-   load_boston
-   load_iris
-   load_diabetes
-   load_digits
-   load_linnerud
-   load_wine
-   load_breast_cancer
-
-These datasets are useful to quickly illustrate the behavior of the
-various algorithms implemented in scikit-learn. They are however often too
-small to be representative of real world machine learning tasks.
-
-.. include:: ../../sklearn/datasets/descr/boston_house_prices.rst
-
-.. include:: ../../sklearn/datasets/descr/iris.rst
-
-.. include:: ../../sklearn/datasets/descr/diabetes.rst
-
-.. include:: ../../sklearn/datasets/descr/digits.rst
-
-.. include:: ../../sklearn/datasets/descr/linnerud.rst
-
-.. include:: ../../sklearn/datasets/descr/wine_data.rst
-
-.. include:: ../../sklearn/datasets/descr/breast_cancer.rst
-
-.. _real_world_datasets:
-
-Real world datasets
-===================
-
-scikit-learn provides tools to load larger datasets, downloading them if
-necessary.
-
-They can be loaded using the following functions:
-
-.. autosummary::
-
-   :toctree: ../modules/generated/
-   :template: function.rst
-
-   fetch_olivetti_faces
-   fetch_20newsgroups
-   fetch_20newsgroups_vectorized
-   fetch_lfw_people
-   fetch_lfw_pairs
-   fetch_covtype
-   fetch_rcv1
-   fetch_kddcup99
-   fetch_california_housing
-
-.. include:: ../../sklearn/datasets/descr/olivetti_faces.rst
-
-.. include:: ../../sklearn/datasets/descr/twenty_newsgroups.rst
-
-.. include:: ../../sklearn/datasets/descr/lfw.rst
-
-.. include:: ../../sklearn/datasets/descr/covtype.rst
-
-.. include:: ../../sklearn/datasets/descr/rcv1.rst
-
-.. include:: ../../sklearn/datasets/descr/kddcup99.rst
-
-.. include:: ../../sklearn/datasets/descr/california_housing.rst
-
-.. _sample_generators:
-
-Generated datasets
-==================
-
-In addition, scikit-learn includes various random sample generators that
-can be used to build artificial datasets of controlled size and complexity.
-
-Generators for classification and clustering
---------------------------------------------
-
-These generators produce a matrix of features and corresponding discrete
-targets.
-
-Single label
-~~~~~~~~~~~~
-
-Both :func:`make_blobs` and :func:`make_classification` create multiclass
-datasets by allocating each class one or more normally-distributed clusters of
-points.  :func:`make_blobs` provides greater control regarding the centers and
-standard deviations of each cluster, and is used to demonstrate clustering.
-:func:`make_classification` specialises in introducing noise by way of:
-correlated, redundant and uninformative features; multiple Gaussian clusters
-per class; and linear transformations of the feature space.
-
-:func:`make_gaussian_quantiles` divides a single Gaussian cluster into
-near-equal-size classes separated by concentric hyperspheres.
-:func:`make_hastie_10_2` generates a similar binary, 10-dimensional problem.
-
-.. image:: ../auto_examples/datasets/images/sphx_glr_plot_random_dataset_001.png
-   :target: ../auto_examples/datasets/plot_random_dataset.html
-   :scale: 50
-   :align: center
-
-:func:`make_circles` and :func:`make_moons` generate 2d binary classification
-datasets that are challenging to certain algorithms (e.g. centroid-based
-clustering or linear classification), including optional Gaussian noise.
-They are useful for visualisation. :func:`make_circles` produces Gaussian data
-with a spherical decision boundary for binary classification, while
-:func:`make_moons` produces two interleaving half circles.
-
-Multilabel
-~~~~~~~~~~
-
-:func:`make_multilabel_classification` generates random samples with multiple
-labels, reflecting a bag of words drawn from a mixture of topics. The number of
-topics for each document is drawn from a Poisson distribution, and the topics
-themselves are drawn from a fixed random distribution. Similarly, the number of
-words is drawn from Poisson, with words drawn from a multinomial, where each
-topic defines a probability distribution over words. Simplifications with
-respect to true bag-of-words mixtures include:
-
-* Per-topic word distributions are independently drawn, where in reality all
-  would be affected by a sparse base distribution, and would be correlated.
-* For a document generated from multiple topics, all topics are weighted
-  equally in generating its bag of words.
-* Documents without labels words at random, rather than from a base
-  distribution.
-
-.. image:: ../auto_examples/datasets/images/sphx_glr_plot_random_multilabel_dataset_001.png
-   :target: ../auto_examples/datasets/plot_random_multilabel_dataset.html
-   :scale: 50
-   :align: center
-
-Biclustering
-~~~~~~~~~~~~
-
-.. autosummary::
-
-   :toctree: ../modules/generated/
-   :template: function.rst
-
-   make_biclusters
-   make_checkerboard
-
-
-Generators for regression
--------------------------
-
-:func:`make_regression` produces regression targets as an optionally-sparse
-random linear combination of random features, with noise. Its informative
-features may be uncorrelated, or low rank (few features account for most of the
-variance).
-
-Other regression generators generate functions deterministically from
-randomized features.  :func:`make_sparse_uncorrelated` produces a target as a
-linear combination of four features with fixed coefficients.
-Others encode explicitly non-linear relations:
-:func:`make_friedman1` is related by polynomial and sine transforms;
-:func:`make_friedman2` includes feature multiplication and reciprocation; and
-:func:`make_friedman3` is similar with an arctan transformation on the target.
-
-Generators for manifold learning
---------------------------------
-
-.. autosummary::
-
-   :toctree: ../modules/generated/
-   :template: function.rst
-
-   make_s_curve
-   make_swiss_roll
-
-Generators for decomposition
-----------------------------
-
-.. autosummary::
-
-   :toctree: ../modules/generated/
-   :template: function.rst
-
-   make_low_rank_matrix
-   make_sparse_coded_signal
-   make_spd_matrix
-   make_sparse_spd_matrix
-
-
-.. _loading_other_datasets:
-
-Loading other datasets
-======================
-
-.. _sample_images:
-
-Sample images
--------------
-
-Scikit-learn also embed a couple of sample JPEG images published under Creative
-Commons license by their authors. Those images can be useful to test algorithms
-and pipeline on 2D data.
-
-.. autosummary::
-
-   :toctree: ../modules/generated/
-   :template: function.rst
-
-   load_sample_images
-   load_sample_image
-
-.. image:: ../auto_examples/cluster/images/sphx_glr_plot_color_quantization_001.png
-   :target: ../auto_examples/cluster/plot_color_quantization.html
-   :scale: 30
-   :align: right
-
-
-.. warning::
-
-  The default coding of images is based on the ``uint8`` dtype to
-  spare memory.  Often machine learning algorithms work best if the
-  input is converted to a floating point representation first.  Also,
-  if you plan to use ``matplotlib.pyplpt.imshow`` don't forget to scale to the range
-  0 - 1 as done in the following example.
-
-.. topic:: Examples:
-
-    * :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`
-
-.. _libsvm_loader:
-
-Datasets in svmlight / libsvm format
-------------------------------------
-
-scikit-learn includes utility functions for loading
-datasets in the svmlight / libsvm format. In this format, each line
-takes the form ``<label> <feature-id>:<feature-value>
-<feature-id>:<feature-value> ...``. This format is especially suitable for sparse datasets.
-In this module, scipy sparse CSR matrices are used for ``X`` and numpy arrays are used for ``y``.
-
-You may load a dataset like as follows::
-
-  >>> from sklearn.datasets import load_svmlight_file
-  >>> X_train, y_train = load_svmlight_file("/path/to/train_dataset.txt")
-  ...                                                         # doctest: +SKIP
-
-You may also load two (or more) datasets at once::
-
-  >>> X_train, y_train, X_test, y_test = load_svmlight_files(
-  ...     ("/path/to/train_dataset.txt", "/path/to/test_dataset.txt"))
-  ...                                                         # doctest: +SKIP
-
-In this case, ``X_train`` and ``X_test`` are guaranteed to have the same number
-of features. Another way to achieve the same result is to fix the number of
-features::
-
-  >>> X_test, y_test = load_svmlight_file(
-  ...     "/path/to/test_dataset.txt", n_features=X_train.shape[1])
-  ...                                                         # doctest: +SKIP
-
-.. topic:: Related links:
-
- _`Public datasets in svmlight / libsvm format`: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets
-
- _`Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader
-
-..
-    For doctests:
-
-    >>> import numpy as np
-    >>> import os
-
-.. _openml:
-
-Downloading datasets from the openml.org repository
----------------------------------------------------
-
-`openml.org <https://openml.org>`_ is a public repository for machine learning
-data and experiments, that allows everybody to upload open datasets.
-
-The ``sklearn.datasets`` package is able to download datasets
-from the repository using the function
-:func:`sklearn.datasets.fetch_openml`.
-
-For example, to download a dataset of gene expressions in mice brains::
-
-  >>> from sklearn.datasets import fetch_openml
-  >>> mice = fetch_openml(name='miceprotein', version=4)
-
-To fully specify a dataset, you need to provide a name and a version, though
-the version is optional, see :ref:`openml_versions` below.
-The dataset contains a total of 1080 examples belonging to 8 different
-classes::
-
-  >>> mice.data.shape
-  (1080, 77)
-  >>> mice.target.shape
-  (1080,)
-  >>> np.unique(mice.target)
-  array(['c-CS-m', 'c-CS-s', 'c-SC-m', 'c-SC-s', 't-CS-m', 't-CS-s', 't-SC-m', 't-SC-s'], dtype=object)
-
-You can get more information on the dataset by looking at the ``DESCR``
-and ``details`` attributes::
-
-  >>> print(mice.DESCR) # doctest: +SKIP
-  **Author**: Clara Higuera, Katheleen J. Gardiner, Krzysztof J. Cios
-  **Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Mice+Protein+Expression) - 2015
-  **Please cite**: Higuera C, Gardiner KJ, Cios KJ (2015) Self-Organizing
-  Feature Maps Identify Proteins Critical to Learning in a Mouse Model of Down
-  Syndrome. PLoS ONE 10(6): e0129126...
-
-  >>> mice.details # doctest: +SKIP
-  {'id': '40966', 'name': 'MiceProtein', 'version': '4', 'format': 'ARFF',
-  'upload_date': '2017-11-08T16:00:15', 'licence': 'Public',
-  'url': 'https://www.openml.org/data/v1/download/17928620/MiceProtein.arff',
-  'file_id': '17928620', 'default_target_attribute': 'class',
-  'row_id_attribute': 'MouseID',
-  'ignore_attribute': ['Genotype', 'Treatment', 'Behavior'],
-  'tag': ['OpenML-CC18', 'study_135', 'study_98', 'study_99'],
-  'visibility': 'public', 'status': 'active',
-  'md5_checksum': '3c479a6885bfa0438971388283a1ce32'}
-
-
-The ``DESCR`` contains a free-text description of the data, while ``details``
-contains a dictionary of meta-data stored by openml, like the dataset id.
-For more details, see the `OpenML documentation
-<https://docs.openml.org/#data>`_ The ``data_id`` of the mice protein dataset
-is 40966, and you can use this (or the name) to get more information on the
-dataset on the openml website::
-
-  >>> mice.url
-  'https://www.openml.org/d/40966'
-
-The ``data_id`` also uniquely identifies a dataset from OpenML::
-
-  >>> mice = fetch_openml(data_id=40966)
-  >>> mice.details # doctest: +SKIP
-  {'id': '4550', 'name': 'MiceProtein', 'version': '1', 'format': 'ARFF',
-  'creator': ...,
-  'upload_date': '2016-02-17T14:32:49', 'licence': 'Public', 'url':
-  'https://www.openml.org/data/v1/download/1804243/MiceProtein.ARFF', 'file_id':
-  '1804243', 'default_target_attribute': 'class', 'citation': 'Higuera C,
-  Gardiner KJ, Cios KJ (2015) Self-Organizing Feature Maps Identify Proteins
-  Critical to Learning in a Mouse Model of Down Syndrome. PLoS ONE 10(6):
-  e0129126. [Web Link] journal.pone.0129126', 'tag': ['OpenML100', 'study_14',
-  'study_34'], 'visibility': 'public', 'status': 'active', 'md5_checksum':
-  '3c479a6885bfa0438971388283a1ce32'}
-
-.. _openml_versions:
-
-Dataset Versions
-~~~~~~~~~~~~~~~~
-
-A dataset is uniquely specified by its ``data_id``, but not necessarily by its
-name. Several different "versions" of a dataset with the same name can exist
-which can contain entirely different datasets.
-If a particular version of a dataset has been found to contain significant
-issues, it might be deactivated. Using a name to specify a dataset will yield
-the earliest version of a dataset that is still active. That means that
-``fetch_openml(name="miceprotein")`` can yield different results at different
-times if earlier versions become inactive.
-You can see that the dataset with ``data_id`` 40966 that we fetched above is
-the version 1 of the "miceprotein" dataset::
-
-  >>> mice.details['version']  #doctest: +SKIP
-  '1'
-
-In fact, this dataset only has one version. The iris dataset on the other hand
-has multiple versions::
-
-  >>> iris = fetch_openml(name="iris")
-  >>> iris.details['version']  #doctest: +SKIP
-  '1'
-  >>> iris.details['id']  #doctest: +SKIP
-  '61'
-
-  >>> iris_61 = fetch_openml(data_id=61)
-  >>> iris_61.details['version']
-  '1'
-  >>> iris_61.details['id']
-  '61'
-
-  >>> iris_969 = fetch_openml(data_id=969)
-  >>> iris_969.details['version']
-  '3'
-  >>> iris_969.details['id']
-  '969'
-
-Specifying the dataset by the name "iris" yields the lowest version, version 1,
-with the ``data_id`` 61. To make sure you always get this exact dataset, it is
-safest to specify it by the dataset ``data_id``. The other dataset, with
-``data_id`` 969, is version 3 (version 2 has become inactive), and contains a
-binarized version of the data::
-
-  >>> np.unique(iris_969.target)
-  array(['N', 'P'], dtype=object)
-
-You can also specify both the name and the version, which also uniquely
-identifies the dataset::
-
-  >>> iris_version_3 = fetch_openml(name="iris", version=3)
-  >>> iris_version_3.details['version']
-  '3'
-  >>> iris_version_3.details['id']
-  '969'
-
-
-.. topic:: References:
-
- * Vanschoren, van Rijn, Bischl and Torgo
-   `"OpenML: networked science in machine learning"
-   <https://arxiv.org/pdf/1407.7722.pdf>`_,
-   ACM SIGKDD Explorations Newsletter, 15(2), 49-60, 2014.
-
-.. _external_datasets:
-
-Loading from external datasets
-------------------------------
-
-scikit-learn works on any numeric data stored as numpy arrays or scipy sparse
-matrices. Other types that are convertible to numeric arrays such as pandas
-DataFrame are also acceptable.
-
-Here are some recommended ways to load standard columnar data into a
-format usable by scikit-learn:
-
-* `pandas.io <https://pandas.pydata.org/pandas-docs/stable/io.html>`_
-  provides tools to read data from common formats including CSV, Excel, JSON
-  and SQL. DataFrames may also be constructed from lists of tuples or dicts.
-  Pandas handles heterogeneous data smoothly and provides tools for
-  manipulation and conversion into a numeric array suitable for scikit-learn.
-* `scipy.io <https://docs.scipy.org/doc/scipy/reference/io.html>`_
-  specializes in binary formats often used in scientific computing
-  context such as .mat and .arff
-* `numpy/routines.io <https://docs.scipy.org/doc/numpy/reference/routines.io.html>`_
-  for standard loading of columnar data into numpy arrays
-* scikit-learn's :func:`datasets.load_svmlight_file` for the svmlight or libSVM
-  sparse format
-* scikit-learn's :func:`datasets.load_files` for directories of text files where
-  the name of each directory is the name of each category and each file inside
-  of each directory corresponds to one sample from that category
-
-For some miscellaneous data such as images, videos, and audio, you may wish to
-refer to:
-
-* `skimage.io <https://scikit-image.org/docs/dev/api/skimage.io.html>`_ or
-  `Imageio <https://imageio.readthedocs.io/en/latest/userapi.html>`_
-  for loading images and videos into numpy arrays
-* `scipy.io.wavfile.read
-  <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.io.wavfile.read.html>`_
-  for reading WAV files into a numpy array
-
-Categorical (or nominal) features stored as strings (common in pandas DataFrames)
-will need converting to numerical features using :class:`sklearn.preprocessing.OneHotEncoder`
-or :class:`sklearn.preprocessing.OrdinalEncoder` or similar.
-See :ref:`preprocessing`.
-
-Note: if you manage your own numerical data it is recommended to use an
-optimized file format such as HDF5 to reduce data load times. Various libraries
-such as H5Py, PyTables and pandas provides a Python interface for reading and
-writing data in that format.
diff -pruN 0.23.2-5/doc/datasets/loading_other_datasets.rst 1.1.1-1/doc/datasets/loading_other_datasets.rst
--- 0.23.2-5/doc/datasets/loading_other_datasets.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/datasets/loading_other_datasets.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,274 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. _loading_other_datasets:
+
+Loading other datasets
+======================
+
+.. currentmodule:: sklearn.datasets
+
+.. _sample_images:
+
+Sample images
+-------------
+
+Scikit-learn also embeds a couple of sample JPEG images published under Creative
+Commons license by their authors. Those images can be useful to test algorithms
+and pipelines on 2D data.
+
+.. autosummary::
+
+   load_sample_images
+   load_sample_image
+
+.. image:: ../auto_examples/cluster/images/sphx_glr_plot_color_quantization_001.png
+   :target: ../auto_examples/cluster/plot_color_quantization.html
+   :scale: 30
+   :align: right
+
+
+.. warning::
+
+  The default coding of images is based on the ``uint8`` dtype to
+  spare memory. Often machine learning algorithms work best if the
+  input is converted to a floating point representation first. Also,
+  if you plan to use ``matplotlib.pyplpt.imshow``, don't forget to scale to the range
+  0 - 1 as done in the following example.
+
+.. topic:: Examples:
+
+    * :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`
+
+.. _libsvm_loader:
+
+Datasets in svmlight / libsvm format
+------------------------------------
+
+scikit-learn includes utility functions for loading
+datasets in the svmlight / libsvm format. In this format, each line
+takes the form ``<label> <feature-id>:<feature-value>
+<feature-id>:<feature-value> ...``. This format is especially suitable for sparse datasets.
+In this module, scipy sparse CSR matrices are used for ``X`` and numpy arrays are used for ``y``.
+
+You may load a dataset like as follows::
+
+  >>> from sklearn.datasets import load_svmlight_file
+  >>> X_train, y_train = load_svmlight_file("/path/to/train_dataset.txt")
+  ...                                                         # doctest: +SKIP
+
+You may also load two (or more) datasets at once::
+
+  >>> X_train, y_train, X_test, y_test = load_svmlight_files(
+  ...     ("/path/to/train_dataset.txt", "/path/to/test_dataset.txt"))
+  ...                                                         # doctest: +SKIP
+
+In this case, ``X_train`` and ``X_test`` are guaranteed to have the same number
+of features. Another way to achieve the same result is to fix the number of
+features::
+
+  >>> X_test, y_test = load_svmlight_file(
+  ...     "/path/to/test_dataset.txt", n_features=X_train.shape[1])
+  ...                                                         # doctest: +SKIP
+
+.. topic:: Related links:
+
+ _`Public datasets in svmlight / libsvm format`: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets
+
+ _`Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader
+
+..
+    For doctests:
+
+    >>> import numpy as np
+    >>> import os
+
+.. _openml:
+
+Downloading datasets from the openml.org repository
+---------------------------------------------------
+
+`openml.org <https://openml.org>`_ is a public repository for machine learning
+data and experiments, that allows everybody to upload open datasets.
+
+The ``sklearn.datasets`` package is able to download datasets
+from the repository using the function
+:func:`sklearn.datasets.fetch_openml`.
+
+For example, to download a dataset of gene expressions in mice brains::
+
+  >>> from sklearn.datasets import fetch_openml
+  >>> mice = fetch_openml(name='miceprotein', version=4)
+
+To fully specify a dataset, you need to provide a name and a version, though
+the version is optional, see :ref:`openml_versions` below.
+The dataset contains a total of 1080 examples belonging to 8 different
+classes::
+
+  >>> mice.data.shape
+  (1080, 77)
+  >>> mice.target.shape
+  (1080,)
+  >>> np.unique(mice.target)
+  array(['c-CS-m', 'c-CS-s', 'c-SC-m', 'c-SC-s', 't-CS-m', 't-CS-s', 't-SC-m', 't-SC-s'], dtype=object)
+
+You can get more information on the dataset by looking at the ``DESCR``
+and ``details`` attributes::
+
+  >>> print(mice.DESCR) # doctest: +SKIP
+  **Author**: Clara Higuera, Katheleen J. Gardiner, Krzysztof J. Cios
+  **Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Mice+Protein+Expression) - 2015
+  **Please cite**: Higuera C, Gardiner KJ, Cios KJ (2015) Self-Organizing
+  Feature Maps Identify Proteins Critical to Learning in a Mouse Model of Down
+  Syndrome. PLoS ONE 10(6): e0129126...
+
+  >>> mice.details # doctest: +SKIP
+  {'id': '40966', 'name': 'MiceProtein', 'version': '4', 'format': 'ARFF',
+  'upload_date': '2017-11-08T16:00:15', 'licence': 'Public',
+  'url': 'https://www.openml.org/data/v1/download/17928620/MiceProtein.arff',
+  'file_id': '17928620', 'default_target_attribute': 'class',
+  'row_id_attribute': 'MouseID',
+  'ignore_attribute': ['Genotype', 'Treatment', 'Behavior'],
+  'tag': ['OpenML-CC18', 'study_135', 'study_98', 'study_99'],
+  'visibility': 'public', 'status': 'active',
+  'md5_checksum': '3c479a6885bfa0438971388283a1ce32'}
+
+
+The ``DESCR`` contains a free-text description of the data, while ``details``
+contains a dictionary of meta-data stored by openml, like the dataset id.
+For more details, see the `OpenML documentation
+<https://docs.openml.org/#data>`_ The ``data_id`` of the mice protein dataset
+is 40966, and you can use this (or the name) to get more information on the
+dataset on the openml website::
+
+  >>> mice.url
+  'https://www.openml.org/d/40966'
+
+The ``data_id`` also uniquely identifies a dataset from OpenML::
+
+  >>> mice = fetch_openml(data_id=40966)
+  >>> mice.details # doctest: +SKIP
+  {'id': '4550', 'name': 'MiceProtein', 'version': '1', 'format': 'ARFF',
+  'creator': ...,
+  'upload_date': '2016-02-17T14:32:49', 'licence': 'Public', 'url':
+  'https://www.openml.org/data/v1/download/1804243/MiceProtein.ARFF', 'file_id':
+  '1804243', 'default_target_attribute': 'class', 'citation': 'Higuera C,
+  Gardiner KJ, Cios KJ (2015) Self-Organizing Feature Maps Identify Proteins
+  Critical to Learning in a Mouse Model of Down Syndrome. PLoS ONE 10(6):
+  e0129126. [Web Link] journal.pone.0129126', 'tag': ['OpenML100', 'study_14',
+  'study_34'], 'visibility': 'public', 'status': 'active', 'md5_checksum':
+  '3c479a6885bfa0438971388283a1ce32'}
+
+.. _openml_versions:
+
+Dataset Versions
+~~~~~~~~~~~~~~~~
+
+A dataset is uniquely specified by its ``data_id``, but not necessarily by its
+name. Several different "versions" of a dataset with the same name can exist
+which can contain entirely different datasets.
+If a particular version of a dataset has been found to contain significant
+issues, it might be deactivated. Using a name to specify a dataset will yield
+the earliest version of a dataset that is still active. That means that
+``fetch_openml(name="miceprotein")`` can yield different results at different
+times if earlier versions become inactive.
+You can see that the dataset with ``data_id`` 40966 that we fetched above is
+the first version of the "miceprotein" dataset::
+
+  >>> mice.details['version']  #doctest: +SKIP
+  '1'
+
+In fact, this dataset only has one version. The iris dataset on the other hand
+has multiple versions::
+
+  >>> iris = fetch_openml(name="iris")
+  >>> iris.details['version']  #doctest: +SKIP
+  '1'
+  >>> iris.details['id']  #doctest: +SKIP
+  '61'
+
+  >>> iris_61 = fetch_openml(data_id=61)
+  >>> iris_61.details['version']
+  '1'
+  >>> iris_61.details['id']
+  '61'
+
+  >>> iris_969 = fetch_openml(data_id=969)
+  >>> iris_969.details['version']
+  '3'
+  >>> iris_969.details['id']
+  '969'
+
+Specifying the dataset by the name "iris" yields the lowest version, version 1,
+with the ``data_id`` 61. To make sure you always get this exact dataset, it is
+safest to specify it by the dataset ``data_id``. The other dataset, with
+``data_id`` 969, is version 3 (version 2 has become inactive), and contains a
+binarized version of the data::
+
+  >>> np.unique(iris_969.target)
+  array(['N', 'P'], dtype=object)
+
+You can also specify both the name and the version, which also uniquely
+identifies the dataset::
+
+  >>> iris_version_3 = fetch_openml(name="iris", version=3)
+  >>> iris_version_3.details['version']
+  '3'
+  >>> iris_version_3.details['id']
+  '969'
+
+
+.. topic:: References:
+
+ * :arxiv:`Vanschoren, van Rijn, Bischl and Torgo. "OpenML: networked science in
+   machine learning" ACM SIGKDD Explorations Newsletter, 15(2), 49-60, 2014.
+   <1407.7722>`
+
+.. _external_datasets:
+
+Loading from external datasets
+------------------------------
+
+scikit-learn works on any numeric data stored as numpy arrays or scipy sparse
+matrices. Other types that are convertible to numeric arrays such as pandas
+DataFrame are also acceptable.
+
+Here are some recommended ways to load standard columnar data into a
+format usable by scikit-learn:
+
+* `pandas.io <https://pandas.pydata.org/pandas-docs/stable/io.html>`_
+  provides tools to read data from common formats including CSV, Excel, JSON
+  and SQL. DataFrames may also be constructed from lists of tuples or dicts.
+  Pandas handles heterogeneous data smoothly and provides tools for
+  manipulation and conversion into a numeric array suitable for scikit-learn.
+* `scipy.io <https://docs.scipy.org/doc/scipy/reference/io.html>`_
+  specializes in binary formats often used in scientific computing
+  context such as .mat and .arff
+* `numpy/routines.io <https://docs.scipy.org/doc/numpy/reference/routines.io.html>`_
+  for standard loading of columnar data into numpy arrays
+* scikit-learn's :func:`datasets.load_svmlight_file` for the svmlight or libSVM
+  sparse format
+* scikit-learn's :func:`datasets.load_files` for directories of text files where
+  the name of each directory is the name of each category and each file inside
+  of each directory corresponds to one sample from that category
+
+For some miscellaneous data such as images, videos, and audio, you may wish to
+refer to:
+
+* `skimage.io <https://scikit-image.org/docs/dev/api/skimage.io.html>`_ or
+  `Imageio <https://imageio.readthedocs.io/en/latest/userapi.html>`_
+  for loading images and videos into numpy arrays
+* `scipy.io.wavfile.read
+  <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.io.wavfile.read.html>`_
+  for reading WAV files into a numpy array
+
+Categorical (or nominal) features stored as strings (common in pandas DataFrames)
+will need converting to numerical features using :class:`~sklearn.preprocessing.OneHotEncoder`
+or :class:`~sklearn.preprocessing.OrdinalEncoder` or similar.
+See :ref:`preprocessing`.
+
+Note: if you manage your own numerical data it is recommended to use an
+optimized file format such as HDF5 to reduce data load times. Various libraries
+such as H5Py, PyTables and pandas provides a Python interface for reading and
+writing data in that format.
diff -pruN 0.23.2-5/doc/datasets/real_world.rst 1.1.1-1/doc/datasets/real_world.rst
--- 0.23.2-5/doc/datasets/real_world.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/datasets/real_world.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,41 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. _real_world_datasets:
+
+Real world datasets
+===================
+
+.. currentmodule:: sklearn.datasets
+
+scikit-learn provides tools to load larger datasets, downloading them if
+necessary.
+
+They can be loaded using the following functions:
+
+.. autosummary::
+
+   fetch_olivetti_faces
+   fetch_20newsgroups
+   fetch_20newsgroups_vectorized
+   fetch_lfw_people
+   fetch_lfw_pairs
+   fetch_covtype
+   fetch_rcv1
+   fetch_kddcup99
+   fetch_california_housing
+
+.. include:: ../../sklearn/datasets/descr/olivetti_faces.rst
+
+.. include:: ../../sklearn/datasets/descr/twenty_newsgroups.rst
+
+.. include:: ../../sklearn/datasets/descr/lfw.rst
+
+.. include:: ../../sklearn/datasets/descr/covtype.rst
+
+.. include:: ../../sklearn/datasets/descr/rcv1.rst
+
+.. include:: ../../sklearn/datasets/descr/kddcup99.rst
+
+.. include:: ../../sklearn/datasets/descr/california_housing.rst
diff -pruN 0.23.2-5/doc/datasets/sample_generators.rst 1.1.1-1/doc/datasets/sample_generators.rst
--- 0.23.2-5/doc/datasets/sample_generators.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/datasets/sample_generators.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,112 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. _sample_generators:
+
+Generated datasets
+==================
+
+.. currentmodule:: sklearn.datasets
+
+In addition, scikit-learn includes various random sample generators that
+can be used to build artificial datasets of controlled size and complexity.
+
+Generators for classification and clustering
+--------------------------------------------
+
+These generators produce a matrix of features and corresponding discrete
+targets.
+
+Single label
+~~~~~~~~~~~~
+
+Both :func:`make_blobs` and :func:`make_classification` create multiclass
+datasets by allocating each class one or more normally-distributed clusters of
+points.  :func:`make_blobs` provides greater control regarding the centers and
+standard deviations of each cluster, and is used to demonstrate clustering.
+:func:`make_classification` specializes in introducing noise by way of:
+correlated, redundant and uninformative features; multiple Gaussian clusters
+per class; and linear transformations of the feature space.
+
+:func:`make_gaussian_quantiles` divides a single Gaussian cluster into
+near-equal-size classes separated by concentric hyperspheres.
+:func:`make_hastie_10_2` generates a similar binary, 10-dimensional problem.
+
+.. image:: ../auto_examples/datasets/images/sphx_glr_plot_random_dataset_001.png
+   :target: ../auto_examples/datasets/plot_random_dataset.html
+   :scale: 50
+   :align: center
+
+:func:`make_circles` and :func:`make_moons` generate 2d binary classification
+datasets that are challenging to certain algorithms (e.g. centroid-based
+clustering or linear classification), including optional Gaussian noise.
+They are useful for visualization. :func:`make_circles` produces Gaussian data
+with a spherical decision boundary for binary classification, while
+:func:`make_moons` produces two interleaving half circles.
+
+Multilabel
+~~~~~~~~~~
+
+:func:`make_multilabel_classification` generates random samples with multiple
+labels, reflecting a bag of words drawn from a mixture of topics. The number of
+topics for each document is drawn from a Poisson distribution, and the topics
+themselves are drawn from a fixed random distribution. Similarly, the number of
+words is drawn from Poisson, with words drawn from a multinomial, where each
+topic defines a probability distribution over words. Simplifications with
+respect to true bag-of-words mixtures include:
+
+* Per-topic word distributions are independently drawn, where in reality all
+  would be affected by a sparse base distribution, and would be correlated.
+* For a document generated from multiple topics, all topics are weighted
+  equally in generating its bag of words.
+* Documents without labels words at random, rather than from a base
+  distribution.
+
+.. image:: ../auto_examples/datasets/images/sphx_glr_plot_random_multilabel_dataset_001.png
+   :target: ../auto_examples/datasets/plot_random_multilabel_dataset.html
+   :scale: 50
+   :align: center
+
+Biclustering
+~~~~~~~~~~~~
+
+.. autosummary::
+
+   make_biclusters
+   make_checkerboard
+
+
+Generators for regression
+-------------------------
+
+:func:`make_regression` produces regression targets as an optionally-sparse
+random linear combination of random features, with noise. Its informative
+features may be uncorrelated, or low rank (few features account for most of the
+variance).
+
+Other regression generators generate functions deterministically from
+randomized features.  :func:`make_sparse_uncorrelated` produces a target as a
+linear combination of four features with fixed coefficients.
+Others encode explicitly non-linear relations:
+:func:`make_friedman1` is related by polynomial and sine transforms;
+:func:`make_friedman2` includes feature multiplication and reciprocation; and
+:func:`make_friedman3` is similar with an arctan transformation on the target.
+
+Generators for manifold learning
+--------------------------------
+
+.. autosummary::
+
+   make_s_curve
+   make_swiss_roll
+
+Generators for decomposition
+----------------------------
+
+.. autosummary::
+
+   make_low_rank_matrix
+   make_sparse_coded_signal
+   make_spd_matrix
+   make_sparse_spd_matrix
diff -pruN 0.23.2-5/doc/datasets/toy_dataset.rst 1.1.1-1/doc/datasets/toy_dataset.rst
--- 0.23.2-5/doc/datasets/toy_dataset.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/datasets/toy_dataset.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,43 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. _toy_datasets:
+
+Toy datasets
+============
+
+.. currentmodule:: sklearn.datasets
+
+scikit-learn comes with a few small standard datasets that do not require to
+download any file from some external website.
+
+They can be loaded using the following functions:
+
+.. autosummary::
+
+   load_boston
+   load_iris
+   load_diabetes
+   load_digits
+   load_linnerud
+   load_wine
+   load_breast_cancer
+
+These datasets are useful to quickly illustrate the behavior of the
+various algorithms implemented in scikit-learn. They are however often too
+small to be representative of real world machine learning tasks.
+
+.. include:: ../../sklearn/datasets/descr/boston_house_prices.rst
+
+.. include:: ../../sklearn/datasets/descr/iris.rst
+
+.. include:: ../../sklearn/datasets/descr/diabetes.rst
+
+.. include:: ../../sklearn/datasets/descr/digits.rst
+
+.. include:: ../../sklearn/datasets/descr/linnerud.rst
+
+.. include:: ../../sklearn/datasets/descr/wine_data.rst
+
+.. include:: ../../sklearn/datasets/descr/breast_cancer.rst
diff -pruN 0.23.2-5/doc/datasets.rst 1.1.1-1/doc/datasets.rst
--- 0.23.2-5/doc/datasets.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/datasets.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,71 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. include:: includes/big_toc_css.rst
+
+.. _datasets:
+
+=========================
+Dataset loading utilities
+=========================
+
+.. currentmodule:: sklearn.datasets
+
+The ``sklearn.datasets`` package embeds some small toy datasets
+as introduced in the :ref:`Getting Started <loading_example_dataset>` section.
+
+This package also features helpers to fetch larger datasets commonly
+used by the machine learning community to benchmark algorithms on data
+that comes from the 'real world'.
+
+To evaluate the impact of the scale of the dataset (``n_samples`` and
+``n_features``) while controlling the statistical properties of the data
+(typically the correlation and informativeness of the features), it is
+also possible to generate synthetic data.
+
+**General dataset API.** There are three main kinds of dataset interfaces that
+can be used to get datasets depending on the desired type of dataset.
+
+**The dataset loaders.** They can be used to load small standard datasets,
+described in the :ref:`toy_datasets` section.
+
+**The dataset fetchers.** They can be used to download and load larger datasets,
+described in the :ref:`real_world_datasets` section.
+
+Both loaders and fetchers functions return a :class:`~sklearn.utils.Bunch`
+object holding at least two items:
+an array of shape ``n_samples`` * ``n_features`` with
+key ``data`` (except for 20newsgroups) and a numpy array of
+length ``n_samples``, containing the target values, with key ``target``.
+
+The Bunch object is a dictionary that exposes its keys as attributes.
+For more information about Bunch object, see :class:`~sklearn.utils.Bunch`.
+
+It's also possible for almost all of these function to constrain the output
+to be a tuple containing only the data and the target, by setting the
+``return_X_y`` parameter to ``True``.
+
+The datasets also contain a full description in their ``DESCR`` attribute and
+some contain ``feature_names`` and ``target_names``. See the dataset
+descriptions below for details.
+
+**The dataset generation functions.** They can be used to generate controlled
+synthetic datasets, described in the :ref:`sample_generators` section.
+
+These functions return a tuple ``(X, y)`` consisting of a ``n_samples`` *
+``n_features`` numpy array ``X`` and an array of length ``n_samples``
+containing the targets ``y``.
+
+In addition, there are also miscellaneous tools to load datasets of other
+formats or from other locations, described in the :ref:`loading_other_datasets`
+section.
+
+
+.. toctree::
+    :maxdepth: 2
+
+    datasets/toy_dataset
+    datasets/real_world
+    datasets/sample_generators
+    datasets/loading_other_datasets
diff -pruN 0.23.2-5/doc/data_transforms.rst 1.1.1-1/doc/data_transforms.rst
--- 0.23.2-5/doc/data_transforms.rst	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/data_transforms.rst	2022-05-19 12:16:26.432781500 +0000
@@ -1,3 +1,7 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
 .. include:: includes/big_toc_css.rst
 
 .. _data-transforms:
diff -pruN 0.23.2-5/doc/developers/advanced_installation.rst 1.1.1-1/doc/developers/advanced_installation.rst
--- 0.23.2-5/doc/developers/advanced_installation.rst	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/developers/advanced_installation.rst	2022-05-19 12:16:26.432781500 +0000
@@ -1,11 +1,13 @@
 
 .. _advanced-installation:
 
+.. include:: ../min_dependency_substitutions.rst
+
 ==================================================
 Installing the development version of scikit-learn
 ==================================================
 
-This section introduces how to install the **master branch** of scikit-learn.
+This section introduces how to install the **main branch** of scikit-learn.
 This can be done by either installing a nightly build or building from source.
 
 .. _install_nightly_builds:
@@ -20,11 +22,11 @@ basis.
 Installing a nightly build is the quickest way to:
 
 - try a new feature that will be shipped in the next release (that is, a
-  feature from a pull-request that was recently merged to the master branch);
+  feature from a pull-request that was recently merged to the main branch);
 
 - check whether a bug you encountered has been fixed since the last release.
 
-::
+.. prompt:: bash $
 
   pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
 
@@ -41,30 +43,56 @@ feature, code or documentation improveme
 
 #. Use `Git <https://git-scm.com/>`_ to check out the latest source from the
    `scikit-learn repository <https://github.com/scikit-learn/scikit-learn>`_ on
-   Github.::
+   Github.:
+
+   .. prompt:: bash $
 
-        git clone git://github.com/scikit-learn/scikit-learn.git  # add --depth 1 if your connection is slow
-        cd scikit-learn
+     git clone git://github.com/scikit-learn/scikit-learn.git  # add --depth 1 if your connection is slow
+     cd scikit-learn
 
    If you plan on submitting a pull-request, you should clone from your fork
    instead.
 
+#. Install a recent version of Python (3.9 is recommended at the time of writing)
+   for instance using Miniforge3_. Miniforge provides a conda-based distribution
+   of Python and the most popular scientific libraries.
+
+   If you installed Python with conda, we recommend to create a dedicated
+   `conda environment`_ with all the build dependencies of scikit-learn
+   (namely NumPy_, SciPy_, and Cython_):
+
+   .. prompt:: bash $
+
+     conda create -n sklearn-env -c conda-forge python=3.9 numpy scipy cython
+     conda activate sklearn-env
+
+#. **Alternative to conda:** If you run Linux or similar, you can instead use
+   your system's Python provided it is recent enough (3.8 or higher
+   at the time of writing). In this case, we recommend to create a dedicated
+   virtualenv_ and install the scikit-learn build dependencies with pip:
+
+   .. prompt:: bash $
+
+     python3 -m venv sklearn-env
+     source sklearn-env/bin/activate
+     pip install wheel numpy scipy cython
+
 #. Install a compiler with OpenMP_ support for your platform. See instructions
    for :ref:`compiler_windows`, :ref:`compiler_macos`, :ref:`compiler_linux`
    and :ref:`compiler_freebsd`.
 
-#. Optional (but recommended): create and activate a dedicated virtualenv_
-   or `conda environment`_.
+#. Build the project with pip in :ref:`editable_mode`:
 
-#. Install Cython_ and build the project with pip in :ref:`editable_mode`::
+   .. prompt:: bash $
 
-        pip install cython
-        pip install --verbose --no-build-isolation --editable .
+     pip install --verbose --no-build-isolation --editable .
 
 #. Check that the installed scikit-learn has a version number ending with
-   `.dev0`::
+   `.dev0`:
 
-    python -c "import sklearn; sklearn.show_versions()"
+   .. prompt:: bash $
+
+     python -c "import sklearn; sklearn.show_versions()"
 
 #. Please refer to the :ref:`developers_guide` and :ref:`pytest_tips` to run
    the tests on the module of your choice.
@@ -86,19 +114,11 @@ Runtime dependencies
 Scikit-learn requires the following dependencies both at build time and at
 runtime:
 
-- Python (>= 3.6),
-- NumPy (>= 1.13.3),
-- SciPy (>= 0.19),
-- Joblib (>= 0.11),
-- threadpoolctl (>= 2.0.0).
-
-Those dependencies are **automatically installed by pip** if they were missing
-when building scikit-learn from source.
-
-.. note::
-
-   For running on PyPy, PyPy3-v5.10+, Numpy 1.14.0+, and scipy 1.1.0+
-   are required. For PyPy, only installation instructions with pip apply.
+- Python (>= 3.8),
+- NumPy (>= |NumpyMinVersion|),
+- SciPy (>= |ScipyMinVersion|),
+- Joblib (>= |JoblibMinVersion|),
+- threadpoolctl (>= |ThreadpoolctlMinVersion|).
 
 Build dependencies
 ~~~~~~~~~~~~~~~~~~
@@ -111,7 +131,7 @@ Building Scikit-learn also requires:
     # - sklearn/_build_utils/__init__.py
     # - advanced installation guide
 
-- Cython >= 0.28.5
+- Cython >= |CythonMinVersion|
 - A C/C++ compiler and a matching OpenMP_ runtime library. See the
   :ref:`platform system specific instructions
   <platform_specific_instructions>` for more details.
@@ -125,8 +145,8 @@ Building Scikit-learn also requires:
    (before cythonization) will force the build to fail if OpenMP is not
    supported.
 
-Since version 0.21, scikit-learn automatically detects and use the linear
-algebrea library used by SciPy **at runtime**. Scikit-learn has therefore no
+Since version 0.21, scikit-learn automatically detects and uses the linear
+algebra library used by SciPy **at runtime**. Scikit-learn has therefore no
 build dependency on BLAS/LAPACK implementations such as OpenBlas, Atlas, Blis
 or MKL.
 
@@ -135,9 +155,7 @@ Test dependencies
 
 Running tests requires:
 
-.. |PytestMinVersion| replace:: 4.6.2
-
-- pytest >=\ |PytestMinVersion|
+- pytest >= |PytestMinVersion|
 
 Some tests also require `pandas <https://pandas.pydata.org>`_.
 
@@ -182,26 +200,31 @@ to build scikit-learn Cython extensions
 Windows
 -------
 
-First, install `Build Tools for Visual Studio 2019
-<https://visualstudio.microsoft.com/downloads/>`_.
+First, download the `Build Tools for Visual Studio 2019 installer
+<https://aka.ms/vs/17/release/vs_buildtools.exe>`_.
 
-.. warning::
+Run the downloaded `vs_buildtools.exe` file, during the installation you will
+need to make sure you select "Desktop development with C++", similarly to this
+screenshot:
 
-    You DO NOT need to install Visual Studio 2019. You only need the "Build
-    Tools for Visual Studio 2019", under "All downloads" -> "Tools for Visual
-    Studio 2019".
+.. image:: ../images/visual-studio-build-tools-selection.png
 
 Secondly, find out if you are running 64-bit or 32-bit Python. The building
 command depends on the architecture of the Python interpreter. You can check
 the architecture by running the following in ``cmd`` or ``powershell``
-console::
+console:
+
+.. prompt:: bash $
 
     python -c "import struct; print(struct.calcsize('P') * 8)"
 
-For 64-bit Python, configure the build environment with::
+For 64-bit Python, configure the build environment by running the following
+commands in ``cmd`` or an Anaconda Prompt (if you use Anaconda):
 
-    SET DISTUTILS_USE_SDK=1
-    "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64
+    ::
+
+      $ SET DISTUTILS_USE_SDK=1
+      $ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64
 
 Replace ``x64`` by ``x86`` to build for 32-bit Python.
 
@@ -209,7 +232,9 @@ Please be aware that the path above migh
 aim is to point to the "vcvarsall.bat" file that will set the necessary
 environment variables in the current command prompt.
 
-Finally, build scikit-learn from this command prompt::
+Finally, build scikit-learn from this command prompt:
+
+.. prompt:: bash $
 
     pip install --verbose --no-build-isolation --editable .
 
@@ -226,6 +251,11 @@ to enable OpenMP support:
 
 - or install `libomp` with Homebrew to extend the default Apple clang compiler.
 
+For Apple Silicon M1 hardware, only the conda-forge method below is known to
+work at the time of writing (January 2021). You can install the `macos/arm64`
+distribution of conda using the `miniforge installer
+<https://github.com/conda-forge/miniforge#miniforge>`_
+
 macOS compilers from conda-forge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -233,15 +263,19 @@ If you use the conda package manager (ve
 ``compilers`` meta-package from the conda-forge channel, which provides
 OpenMP-enabled C/C++ compilers based on the llvm toolchain.
 
-First install the macOS command line tools::
+First install the macOS command line tools:
+
+.. prompt:: bash $
 
     xcode-select --install
 
 It is recommended to use a dedicated `conda environment`_ to build
-scikit-learn from source::
+scikit-learn from source:
+
+.. prompt:: bash $
 
-    conda create -n sklearn-dev python numpy scipy cython joblib pytest \
-        "conda-forge::compilers>=1.0.4" conda-forge::llvm-openmp
+    conda create -n sklearn-dev -c conda-forge python numpy scipy cython \
+        joblib threadpoolctl pytest compilers llvm-openmp
     conda activate sklearn-dev
     make clean
     pip install --verbose --no-build-isolation --editable .
@@ -254,14 +288,18 @@ scikit-learn from source::
     problems for this setup.
 
 You can check that the custom compilers are properly installed from conda
-forge using the following command::
+forge using the following command:
 
-    conda list 
+.. prompt:: bash $
+
+    conda list
 
 which should include ``compilers`` and ``llvm-openmp``.
 
 The compilers meta-package will automatically set custom environment
-variables::
+variables:
+
+.. prompt:: bash $
 
     echo $CC
     echo $CXX
@@ -283,17 +321,23 @@ macOS compilers from Homebrew
 Another solution is to enable OpenMP support for the clang compiler shipped
 by default on macOS.
 
-First install the macOS command line tools::
+First install the macOS command line tools:
+
+.. prompt:: bash $
 
     xcode-select --install
 
 Install the Homebrew_ package manager for macOS.
 
-Install the LLVM OpenMP library::
+Install the LLVM OpenMP library:
+
+.. prompt:: bash $
 
     brew install libomp
 
-Set the following environment variables::
+Set the following environment variables:
+
+.. prompt:: bash $
 
     export CC=/usr/bin/clang
     export CXX=/usr/bin/clang++
@@ -303,7 +347,9 @@ Set the following environment variables:
     export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
 
 Finally, build scikit-learn in verbose mode (to check for the presence of the
-``-fopenmp`` flag in the compiler commands)::
+``-fopenmp`` flag in the compiler commands):
+
+.. prompt:: bash $
 
     make clean
     pip install --verbose --no-build-isolation --editable .
@@ -321,11 +367,15 @@ installed the scikit-learn Python develo
 compiler with OpenMP support (typically the GCC toolchain).
 
 Install build dependencies for Debian-based operating systems, e.g.
-Ubuntu::
+Ubuntu:
+
+.. prompt:: bash $
 
     sudo apt-get install build-essential python3-dev python3-pip
 
-then proceed as usual::
+then proceed as usual:
+
+.. prompt:: bash $
 
     pip3 install cython
     pip3 install --verbose --editable .
@@ -338,12 +388,16 @@ isolation from the Python packages insta
 using an isolated environment, ``pip3`` should be replaced by ``pip`` in the
 above commands.
 
-When precompiled wheels of the runtime dependencies are not avalaible for your
-architecture (e.g. ARM), you can install the system versions::
+When precompiled wheels of the runtime dependencies are not available for your
+architecture (e.g. ARM), you can install the system versions:
+
+.. prompt:: bash $
 
     sudo apt-get install cython3 python3-numpy python3-scipy
 
-On Red Hat and clones (e.g. CentOS), install the dependencies using::
+On Red Hat and clones (e.g. CentOS), install the dependencies using:
+
+.. prompt:: bash $
 
     sudo yum -y install gcc gcc-c++ python3-devel numpy scipy
 
@@ -351,9 +405,12 @@ Linux compilers from conda-forge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Alternatively, install a recent version of the GNU C Compiler toolchain (GCC)
-in the user folder using conda::
+in the user folder using conda:
 
-    conda create -n sklearn-dev numpy scipy joblib cython conda-forge::compilers
+.. prompt:: bash $
+
+    conda create -n sklearn-dev -c conda-forge python numpy scipy cython \
+        joblib threadpoolctl pytest compilers
     conda activate sklearn-dev
     pip install --verbose --no-build-isolation --editable .
 
@@ -364,19 +421,25 @@ FreeBSD
 
 The clang compiler included in FreeBSD 12.0 and 11.2 base systems does not
 include OpenMP support. You need to install the `openmp` library from packages
-(or ports)::
+(or ports):
+
+.. prompt:: bash $
 
     sudo pkg install openmp
 
 This will install header files in ``/usr/local/include`` and libs in
 ``/usr/local/lib``. Since these directories are not searched by default, you
-can set the environment variables to these locations::
+can set the environment variables to these locations:
+
+.. prompt:: bash $
 
     export CFLAGS="$CFLAGS -I/usr/local/include"
     export CXXFLAGS="$CXXFLAGS -I/usr/local/include"
     export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/lib -L/usr/local/lib -lomp"
 
-Finally, build the package using the standard command::
+Finally, build the package using the standard command:
+
+.. prompt:: bash $
 
     pip install --verbose --no-build-isolation --editable .
 
@@ -385,6 +448,92 @@ the base system and these steps will not
 
 .. _OpenMP: https://en.wikipedia.org/wiki/OpenMP
 .. _Cython: https://cython.org
+.. _NumPy: https://numpy.org
+.. _SciPy: https://www.scipy.org
 .. _Homebrew: https://brew.sh
 .. _virtualenv: https://docs.python.org/3/tutorial/venv.html
 .. _conda environment: https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html
+.. _Miniforge3: https://github.com/conda-forge/miniforge#miniforge3
+
+Alternative compilers
+=====================
+
+The command:
+
+.. prompt:: bash $
+
+    pip install --verbose --editable .
+
+will build scikit-learn using your default C/C++ compiler. If you want to build
+scikit-learn with another compiler handled by ``distutils`` or by
+``numpy.distutils``, use the following command:
+
+.. prompt:: bash $
+
+    python setup.py build_ext --compiler=<compiler> -i build_clib --compiler=<compiler>
+
+To see the list of available compilers run:
+
+.. prompt:: bash $
+
+    python setup.py build_ext --help-compiler
+
+If your compiler is not listed here, you can specify it via the ``CC`` and
+``LDSHARED`` environment variables (does not work on windows):
+
+.. prompt:: bash $
+
+    CC=<compiler> LDSHARED="<compiler> -shared" python setup.py build_ext -i
+
+Building with Intel C Compiler (ICC) using oneAPI on Linux
+----------------------------------------------------------
+
+Intel provides access to all of its oneAPI toolkits and packages through a
+public APT repository. First you need to get and install the public key of this
+repository:
+
+.. prompt:: bash $
+
+    wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+    sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+    rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+
+Then, add the oneAPI repository to your APT repositories:
+
+.. prompt:: bash $
+
+    sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+    sudo apt-get update
+
+Install ICC, packaged under the name
+``intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic``:
+
+.. prompt:: bash $
+
+    sudo apt-get install intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
+
+Before using ICC, you need to set up environment variables:
+
+.. prompt:: bash $
+
+    source /opt/intel/oneapi/setvars.sh
+
+Finally, you can build scikit-learn. For example on Linux x86_64:
+
+.. prompt:: bash $
+
+    python setup.py build_ext --compiler=intelem -i build_clib --compiler=intelem
+
+Parallel builds
+===============
+
+It is possible to build scikit-learn compiled extensions in parallel by setting
+and environment variable as follows before calling the ``pip install`` or
+``python setup.py build_ext`` commands::
+
+    export SKLEARN_BUILD_PARALLEL=3
+    pip install --verbose --no-build-isolation --editable .
+
+On a machine with 2 CPU cores, it can be beneficial to use a parallelism level
+of 3 to overlap IO bound tasks (reading and writing files on disk) with CPU
+bound tasks (actually compiling).
diff -pruN 0.23.2-5/doc/developers/bug_triaging.rst 1.1.1-1/doc/developers/bug_triaging.rst
--- 0.23.2-5/doc/developers/bug_triaging.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/developers/bug_triaging.rst	2022-05-19 12:16:26.432781500 +0000
@@ -0,0 +1,159 @@
+.. _bug_triaging:
+
+Bug triaging and issue curation
+===============================
+
+The `issue tracker <https://github.com/scikit-learn/scikit-learn/issues>`_
+is important to the communication in the project: it helps
+developers identify major projects to work on, as well as to discuss
+priorities. For this reason, it is important to curate it, adding labels
+to issues and closing issues that are not necessary.
+
+Working on issues to improve them
+---------------------------------
+
+Improving issues increases their chances of being successfully resolved.
+Guidelines on submitting good issues can be found :ref:`here
+<filing_bugs>`.
+A third party can give useful feedback or even add
+comments on the issue.
+The following actions are typically useful:
+
+  - documenting issues that are missing elements to reproduce the problem
+    such as code samples
+
+  - suggesting better use of code formatting
+
+  - suggesting to reformulate the title and description to make them more
+    explicit about the problem to be solved
+
+  - linking to related issues or discussions while briefly describing how
+    they are related, for instance "See also #xyz for a similar attempt
+    at this" or "See also #xyz where the same thing happened in
+    SomeEstimator" provides context and helps the discussion.
+
+.. topic:: Fruitful discussions
+
+   Online discussions may be harder than it seems at first glance, in
+   particular given that a person new to open-source may have a very
+   different understanding of the process than a seasoned maintainer.
+
+   Overall, it is useful to stay positive and assume good will. `The
+   following article
+   <http://gael-varoquaux.info/programming/technical-discussions-are-hard-a-few-tips.html>`_
+   explores how to lead online discussions in the context of open source.
+
+Working on PRs to help review
+-----------------------------
+
+Reviewing code is also encouraged. Contributors and users are welcome to
+participate to the review process following our :ref:`review guidelines
+<code_review>`.
+
+Triaging operations for members of the core and contributor experience teams
+----------------------------------------------------------------------------
+
+In addition to the above, members of the core team and the contributor experience team
+can do the following important tasks:
+
+- Update :ref:`labels for issues and PRs <issue_tracker_tags>`: see the list of
+  the `available github labels
+  <https://github.com/scikit-learn/scikit-learn/labels>`_.
+
+- :ref:`Determine if a PR must be relabeled as stalled <stalled_pull_request>`
+  or needs help (this is typically very important in the context
+  of sprints, where the risk is to create many unfinished PRs)
+
+- If a stalled PR is taken over by a newer PR, then label the stalled PR as
+  "Superseded", leave a comment on the stalled PR linking to the new PR, and
+  likely close the stalled PR.
+
+- Triage issues:
+
+  - **close usage questions** and politely point the reporter to use
+    Stack Overflow instead.
+
+  - **close duplicate issues**, after checking that they are
+    indeed duplicate. Ideally, the original submitter moves the
+    discussion to the older, duplicate issue
+
+  - **close issues that cannot be replicated**, after leaving time (at
+    least a week) to add extra information
+
+:ref:`Saved replies <saved_replies>` are useful to gain time and yet be
+welcoming and polite when triaging.
+
+See the github description for `roles in the organization
+<https://docs.github.com/en/github/setting-up-and-managing-organizations-and-teams/repository-permission-levels-for-an-organization>`_.
+
+.. topic:: Closing issues: a tough call
+
+    When uncertain on whether an issue should be closed or not, it is
+    best to strive for consensus with the original poster, and possibly
+    to seek relevant expertise. However, when the issue is a usage
+    question, or when it has been considered as unclear for many years it
+    should be closed.
+
+A typical workflow for triaging issues
+--------------------------------------
+
+The following workflow [1]_ is a good way to approach issue triaging:
+
+#. Thank the reporter for opening an issue
+
+   The issue tracker is many people’s first interaction with the
+   scikit-learn project itself, beyond just using the library. As such,
+   we want it to be a welcoming, pleasant experience.
+
+#. Is this a usage question? If so close it with a polite message
+   (:ref:`here is an example <saved_replies>`).
+
+#. Is the necessary information provided?
+
+   If crucial information (like the version of scikit-learn used), is
+   missing feel free to ask for that and label the issue with "Needs
+   info".
+
+#. Is this a duplicate issue?
+
+   We have many open issues. If a new issue seems to be a duplicate,
+   point to the original issue. If it is a clear duplicate, or consensus
+   is that it is redundant, close it. Make sure to still thank the
+   reporter, and encourage them to chime in on the original issue, and
+   perhaps try to fix it.
+
+   If the new issue provides relevant information, such as a better or
+   slightly different example, add it to the original issue as a comment
+   or an edit to the original post.
+
+#. Make sure that the title accurately reflects the issue. If you have the
+   necessary permissions edit it yourself if it's not clear.
+
+#. Is the issue minimal and reproducible?
+
+   For bug reports, we ask that the reporter provide a minimal
+   reproducible example. See `this useful post
+   <https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports>`_
+   by Matthew Rocklin for a good explanation. If the example is not
+   reproducible, or if it's clearly not minimal, feel free to ask the reporter
+   if they can provide and example or simplify the provided one.
+   Do acknowledge that writing minimal reproducible examples is hard work.
+   If the reporter is struggling, you can try to write one yourself.
+
+   If a reproducible example is provided, but you see a simplification,
+   add your simpler reproducible example.
+
+#. Add the relevant labels, such as "Documentation" when the issue is
+   about documentation, "Bug" if it is clearly a bug, "Enhancement" if it
+   is an enhancement request, ...
+
+   If the issue is clearly defined and the fix seems relatively
+   straightforward, label the issue as “Good first issue”.
+
+   An additional useful step can be to tag the corresponding module e.g.
+   `sklearn.linear_models` when relevant.
+
+#. Remove the "Needs Triage" label from the issue if the label exists.
+
+.. [1] Adapted from the pandas project `maintainers guide
+       <https://dev.pandas.io/docs/development/maintaining.html>`_
diff -pruN 0.23.2-5/doc/developers/contributing.rst 1.1.1-1/doc/developers/contributing.rst
--- 0.23.2-5/doc/developers/contributing.rst	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/developers/contributing.rst	2022-05-19 12:16:26.436782000 +0000
@@ -4,6 +4,8 @@
 Contributing
 ============
 
+.. currentmodule:: sklearn
+
 This project is a community effort, and everyone is welcome to
 contribute.
 
@@ -22,7 +24,10 @@ See :ref:`new_contributors` to get start
     We are a community based on openness and friendly, didactic,
     discussions.
 
-    We aspire to treat everybody equally, and value their contributions.
+    We aspire to treat everybody equally, and value their contributions.  We
+    are particularly seeking people from underrepresented backgrounds in Open
+    Source Software and scikit-learn in particular to participate and
+    contribute their expertise and experience.
 
     Decisions are made based on technical merit and consensus.
 
@@ -52,11 +57,11 @@ find a typo in the documentation, or hav
 send an email to the mailing list or preferably submit a GitHub pull request.
 Full documentation can be found under the doc/ directory.
 
-But there are many other ways to help. In particular answering queries on the
-`issue tracker <https://github.com/scikit-learn/scikit-learn/issues>`_,
-investigating bugs, and :ref:`reviewing other developers' pull requests
-<code_review>` are very valuable contributions that decrease the burden on the
-project maintainers.
+But there are many other ways to help. In particular helping to
+:ref:`improve, triage, and investigate issues <bug_triaging>` and
+:ref:`reviewing other developers' pull requests <code_review>` are very
+valuable contributions that decrease the burden on the project
+maintainers.
 
 Another way to contribute is to report issues you're facing, and give a "thumbs
 up" on issues that others reported and that are relevant to you.  It also helps
@@ -136,7 +141,8 @@ When you submit an issue to `Github
 follow these guidelines! This will make it a lot easier to provide you with good
 feedback:
 
-- The ideal bug report contains a **short reproducible code snippet**, this way
+- The ideal bug report contains a :ref:`short reproducible code snippet
+  <minimal_reproducer>`, this way
   anyone can try to reproduce the bug easily (see `this
   <https://stackoverflow.com/help/mcve>`_ for more details). If your snippet is
   longer than around 50 lines, please link to a `gist
@@ -154,22 +160,14 @@ feedback:
     >>> import sklearn
     >>> sklearn.show_versions()  # doctest: +SKIP
 
-  .. note::
-
-    This utility function is only available in scikit-learn v0.20+.
-    For previous versions, one has to explicitly run::
-
-     import platform; print(platform.platform())
-     import sys; print("Python", sys.version)
-     import numpy; print("NumPy", numpy.__version__)
-     import scipy; print("SciPy", scipy.__version__)
-     import sklearn; print("Scikit-Learn", sklearn.__version__)
-
 - Please ensure all **code snippets and error messages are formatted in
   appropriate code blocks**.  See `Creating and highlighting code blocks
   <https://help.github.com/articles/creating-and-highlighting-code-blocks>`_
   for more details.
 
+If you want to help curate issues, read :ref:`the following
+<bug_triaging>`.
+
 Contributing code
 =================
 
@@ -186,7 +184,41 @@ Contributing code
   One easy way to find an issue to work on is by applying the "help wanted"
   label in your search. This lists all the issues that have been unclaimed
   so far. In order to claim an issue for yourself, please comment exactly
-  ``take`` on it for the CI to automatically assign the issue to you.
+  ``/take`` on it for the CI to automatically assign the issue to you.
+
+Video resources
+---------------
+These videos are step-by-step introductions on how to contribute to
+scikit-learn, and are a great companion to the following text guidelines.
+Please make sure to still check our guidelines below, since they describe our
+latest up-to-date workflow.
+
+- Crash Course in Contributing to Scikit-Learn & Open Source Projects:
+  `Video <https://youtu.be/5OL8XoMMOfA>`__,
+  `Transcript
+  <https://github.com/data-umbrella/event-transcripts/blob/main/2020/05-andreas-mueller-contributing.md>`__
+
+- Example of Submitting a Pull Request to scikit-learn:
+  `Video <https://youtu.be/PU1WyDPGePI>`__,
+  `Transcript
+  <https://github.com/data-umbrella/event-transcripts/blob/main/2020/06-reshama-shaikh-sklearn-pr.md>`__
+
+- Sprint-specific instructions and practical tips:
+  `Video <https://youtu.be/p_2Uw2BxdhA>`__,
+  `Transcript
+  <https://github.com/data-umbrella/data-umbrella-scikit-learn-sprint/blob/master/3_transcript_ACM_video_vol2.md>`__
+
+- 3 Components of Reviewing a Pull Request:
+  `Video <https://youtu.be/dyxS9KKCNzA>`__,
+  `Transcript
+  <https://github.com/data-umbrella/event-transcripts/blob/main/2021/27-thomas-pr.md>`__
+
+.. note::
+  In January 2021, the default branch name changed from ``master`` to ``main``
+  for the scikit-learn GitHub repository to use more inclusive terms.
+  These videos were created prior to the renaming of the branch.
+  For contributors who are viewing these videos to set up their
+  working environment and submitting a PR, ``master`` should be replaced to ``main``.
 
 How to contribute
 -----------------
@@ -208,66 +240,88 @@ how to set up your git repository:
    repository see `this guide <https://help.github.com/articles/fork-a-repo/>`_.
 
 3. Clone your fork of the scikit-learn repo from your GitHub account to your
-   local disk::
+   local disk:
 
-       $ git clone git@github.com:YourLogin/scikit-learn.git  # add --depth 1 if your connection is slow
-       $ cd scikit-learn
+   .. prompt:: bash $
 
-4. Install the development dependencies::
+      git clone git@github.com:YourLogin/scikit-learn.git  # add --depth 1 if your connection is slow
+      cd scikit-learn
 
-       $ pip install cython pytest pytest-cov flake8 mypy
+3. Follow steps 2-7 in :ref:`install_bleeding_edge` to build scikit-learn in
+   development mode and return to this document.
 
-5. Install scikit-learn in editable mode::
+4. Install the development dependencies:
 
-       $ pip install --no-build-isolation --editable .
+   .. prompt:: bash $
 
-   for more details about advanced installation, see the
-   :ref:`install_bleeding_edge` section.
+        pip install pytest pytest-cov flake8 mypy numpydoc black==22.3.0
 
 .. _upstream:
 
-6. Add the ``upstream`` remote. This saves a reference to the main
+5. Add the ``upstream`` remote. This saves a reference to the main
    scikit-learn repository, which you can use to keep your repository
-   synchronized with the latest changes::
+   synchronized with the latest changes:
+
+   .. prompt:: bash $
 
-    $ git remote add upstream https://github.com/scikit-learn/scikit-learn.git
+        git remote add upstream git@github.com:scikit-learn/scikit-learn.git
+
+6. Check that the `upstream` and `origin` remote aliases are configured correctly
+   by running `git remote -v` which should display::
+
+        origin	git@github.com:YourLogin/scikit-learn.git (fetch)
+        origin	git@github.com:YourLogin/scikit-learn.git (push)
+        upstream	git@github.com:scikit-learn/scikit-learn.git (fetch)
+        upstream	git@github.com:scikit-learn/scikit-learn.git (push)
 
 You should now have a working installation of scikit-learn, and your git
 repository properly configured. The next steps now describe the process of
 modifying code and submitting a PR:
 
-7. Synchronize your master branch with the upstream master branch::
+7. Synchronize your ``main`` branch with the ``upstream/main`` branch,
+   more details on `GitHub Docs <https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/syncing-a-fork>`_:
+
+   .. prompt:: bash $
 
-        $ git checkout master
-        $ git pull upstream master
+        git checkout main
+        git fetch upstream
+        git merge upstream/main
 
-8. Create a feature branch to hold your development changes::
+8. Create a feature branch to hold your development changes:
 
-        $ git checkout -b my_feature
+    .. prompt:: bash $
+
+        git checkout -b my_feature
 
    and start making changes. Always use a feature branch. It's good
-   practice to never work on the ``master`` branch!
+   practice to never work on the ``main`` branch!
 
 9. (**Optional**) Install `pre-commit <https://pre-commit.com/#install>`_ to
-   run code style checks before each commit::
+   run code style checks before each commit:
+
+   .. prompt:: bash $
 
-        $ pip install pre-commit
-        $ pre-commit install
+        pip install pre-commit
+        pre-commit install
 
    pre-commit checks can be disabled for a particular commit with
    `git commit -n`.
 
 10. Develop the feature on your feature branch on your computer, using Git to
     do the version control. When you're done editing, add changed files using
-    ``git add`` and then ``git commit``::
- 
-        $ git add modified_files
-        $ git commit
+    ``git add`` and then ``git commit``:
+
+    .. prompt:: bash $
+
+        git add modified_files
+        git commit
 
     to record your changes in Git, then push the changes to your GitHub
-    account with::
+    account with:
 
-       $ git push -u origin my_feature
+    .. prompt:: bash $
+
+       git push -u origin my_feature
 
 11. Follow `these
     <https://help.github.com/articles/creating-a-pull-request-from-a-fork>`_
@@ -278,7 +332,9 @@ modifying code and submitting a PR:
 .. note::
 
     If you are modifying a Cython module, you have to re-compile after
-    modifications and before testing them::
+    modifications and before testing them:
+
+    .. prompt:: bash $
 
         pip install --no-build-isolation -e .
 
@@ -286,10 +342,12 @@ modifying code and submitting a PR:
     each time, only the files you have modified.
 
 It is often helpful to keep your local feature branch synchronized with the
-latest changes of the main scikit-learn repository::
+latest changes of the main scikit-learn repository:
+
+.. prompt:: bash $
 
-    $ git fetch upstream
-    $ git merge upstream/master
+    git fetch upstream
+    git merge upstream/main
 
 Subsequently, you might need to solve the conflicts. You can refer to the
 `Git documentation related to resolving merge conflict using the command
@@ -323,9 +381,9 @@ In order to ease the reviewing process,
 complies with the following rules before marking a PR as ``[MRG]``. The
 **bolded** ones are especially important:
 
-1. **Give your pull request a helpful title** that summarises what your
+1. **Give your pull request a helpful title** that summarizes what your
    contribution does. This title will often become the commit message once
-   merged so it should summarise your contribution for posterity. In some
+   merged so it should summarize your contribution for posterity. In some
    cases "Fix <ISSUE TITLE>" is enough. "Fix #<ISSUE NUMBER>" is never a
    good title.
 
@@ -333,10 +391,10 @@ complies with the following rules before
    with `pytest`, but it is usually not recommended since it takes a long
    time. It is often enough to only run the test related to your changes:
    for example, if you changed something in
-   `sklearn/linear_model/logistic.py`, running the following commands will
+   `sklearn/linear_model/_logistic.py`, running the following commands will
    usually be enough:
 
-   - `pytest sklearn/linear_model/logistic.py` to make sure the doctest
+   - `pytest sklearn/linear_model/_logistic.py` to make sure the doctest
      examples are correct
    - `pytest sklearn/linear_model/tests/test_logistic.py` to run the tests
      specific to the file
@@ -364,25 +422,37 @@ complies with the following rules before
    verify the correct behavior of the fix or feature. In this manner, further
    modifications on the code base are granted to be consistent with the
    desired behavior. In the case of bug fixes, at the time of the PR, the
-   non-regression tests should fail for the code base in the master branch
+   non-regression tests should fail for the code base in the ``main`` branch
    and pass for the PR code.
 
-5. **Make sure that your PR does not add PEP8 violations**. To check the
+5. Run `black` to auto-format your code.
+
+   .. prompt:: bash $
+
+        black .
+
+   See black's
+   `editor integration documentation <https://black.readthedocs.io/en/stable/integrations/editors.html>`_
+   to configure your editor to run `black`.
+
+6. **Make sure that your PR does not add PEP8 violations**. To check the
    code that you changed, you can run the following command (see
-   :ref:`above <upstream>` to set up the upstream remote)::
+   :ref:`above <upstream>` to set up the ``upstream`` remote):
+
+   .. prompt:: bash $
 
-        git diff upstream/master -u -- "*.py" | flake8 --diff
+        git diff upstream/main -u -- "*.py" | flake8 --diff
 
    or `make flake8-diff` which should work on unix-like system.
 
-6. Follow the :ref:`coding-guidelines`.
+7. Follow the :ref:`coding-guidelines`.
 
 
-7. When applicable, use the validation tools and scripts in the
+8. When applicable, use the validation tools and scripts in the
    ``sklearn.utils`` submodule.  A list of utility routines available
    for developers can be found in the :ref:`developers-utils` page.
 
-8. Often pull requests resolve one or more other issues (or pull requests).
+9. Often pull requests resolve one or more other issues (or pull requests).
    If merging your pull request means that some other issues/PRs should
    be closed, you should `use keywords to create link to them
    <https://github.com/blog/1506-closing-issues-via-pull-requests/>`_
@@ -392,20 +462,23 @@ complies with the following rules before
    related to some other issues/PRs, create a link to them without using
    the keywords (e.g., ``See also #1234``).
 
-9. PRs should often substantiate the change, through benchmarks of
-   performance and efficiency or through examples of usage. Examples also
-   illustrate the features and intricacies of the library to users. Have a
-   look at other examples in the `examples/
-   <https://github.com/scikit-learn/scikit-learn/tree/master/examples>`_
-   directory for reference. Examples should demonstrate why the new
-   functionality is useful in practice and, if possible, compare it to other
-   methods available in scikit-learn.
-
-10. New features often need to be illustrated with narrative documentation in
-    the user guide, with small code snippets. If relevant, please also add
-    references in the literature, with PDF links when possible.
+10. PRs should often substantiate the change, through benchmarks of
+    performance and efficiency (see :ref:`monitoring_performances`) or through
+    examples of usage. Examples also illustrate the features and intricacies of
+    the library to users. Have a look at other examples in the `examples/
+    <https://github.com/scikit-learn/scikit-learn/tree/main/examples>`_
+    directory for reference. Examples should demonstrate why the new
+    functionality is useful in practice and, if possible, compare it to other
+    methods available in scikit-learn.
+
+11. New features have some maintenance overhead. We expect PR authors
+    to take part in the maintenance for the code they submit, at least
+    initially. New features need to be illustrated with narrative
+    documentation in the user guide, with small code snippets.
+    If relevant, please also add references in the literature, with PDF links
+    when possible.
 
-11. The user guide should also include expected time and space complexity
+12. The user guide should also include expected time and space complexity
     of the algorithm and scalability, e.g. "this algorithm can scale to a
     large number of samples > 100000, but does not scale in dimensionality:
     n_features is expected to be lower than 100".
@@ -416,20 +489,20 @@ will expect.
 You can check for common programming errors with the following tools:
 
 * Code with a good unittest coverage (at least 80%, better 100%), check
-  with::
+  with:
+
+  .. prompt:: bash $
 
-    $ pip install pytest pytest-cov
-    $ pytest --cov sklearn path/to/tests_for_package
+    pip install pytest pytest-cov
+    pytest --cov sklearn path/to/tests_for_package
 
   see also :ref:`testing_coverage`
 
-* A moderate use of type annotations is encouraged but is not mandatory. See
-  [mypy quickstart](https://mypy.readthedocs.io/en/latest/getting_started.html)
-  for an introduction, as well as [pandas contributing documentation](
-  https://pandas.pydata.org/pandas-docs/stable/development/contributing.html#type-hints)
-  for style guidelines. Whether you add type annotation or not::
+  Run static analysis with `mypy`:
 
-    mypy --ignore-missing-import sklearn
+  .. prompt:: bash $
+
+      mypy sklearn
 
   must not produce new errors in your pull request. Using `# type: ignore`
   annotation can be a workaround for a few cases that are not supported by
@@ -439,8 +512,7 @@ You can check for common programming err
   - on properties with decorators
 
 Bonus points for contributions that include a performance analysis with
-a benchmark script and profiling output (please report on the mailing
-list or on the GitHub issue).
+a benchmark script and profiling output (see :ref:`monitoring_performances`).
 
 Also check out the :ref:`performance-howto` guide for more details on
 profiling and Cython optimizations.
@@ -456,7 +528,7 @@ profiling and Cython optimizations.
 
    For two very well documented and more detailed guides on development
    workflow, please pay a visit to the `Scipy Development Workflow
-   <https://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html>`_ -
+   <https://docs.scipy.org/doc/scipy/reference/dev/contributor/development_workflow.html>`_ -
    and the `Astropy Workflow for Developers
    <https://astropy.readthedocs.io/en/latest/development/workflow/development_workflow.html>`_
    sections.
@@ -467,7 +539,7 @@ Continuous Integration (CI)
 * Azure pipelines are used for testing scikit-learn on Linux, Mac and Windows,
   with different dependencies and settings.
 * CircleCI is used to build the docs for viewing, for linting with flake8, and
-  for testing with PyPy on Linux
+  for testing with ARM64 / aarch64 on Linux
 
 Please note that if one of the following markers appear in the latest commit
 message, the following actions are taken.
@@ -475,14 +547,23 @@ message, the following actions are taken
     ====================== ===================
     Commit Message Marker  Action Taken by CI
     ---------------------- -------------------
-    [scipy-dev]            Add a Travis build with our dependencies (numpy, scipy, etc ...) development builds
     [ci skip]              CI is skipped completely
+    [cd build]             CD is run (wheels and source distribution are built)
+    [cd build gh]          CD is run only for GitHub Actions
     [lint skip]            Azure pipeline skips linting
+    [scipy-dev]            Build & test with our dependencies (numpy, scipy, etc ...) development builds
+    [icc-build]            Build & test with the Intel C compiler (ICC)
+    [pypy]                 Build & test with PyPy
     [doc skip]             Docs are not built
     [doc quick]            Docs built, but excludes example gallery plots
-    [doc build]            Docs built including example gallery plots
+    [doc build]            Docs built including example gallery plots (very long)
     ====================== ===================
 
+Note that, by default, the documentation is built but only the examples
+that are directly modified by the pull request are executed.
+
+.. _stalled_pull_request:
+
 Stalled pull requests
 ^^^^^^^^^^^^^^^^^^^^^
 
@@ -517,6 +598,35 @@ A good etiquette to take over is:
   new PR to the old one. The new PR should be created by pulling from the
   old one.
 
+Stalled and Unclaimed Issues
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Generally speaking, issues which are up for grabs will have a
+`"help wanted" <https://github.com/scikit-learn/scikit-learn/labels/help%20wanted>`_.
+tag. However, not all issues which need contributors will have this tag,
+as the "help wanted" tag is not always up-to-date with the state
+of the issue. Contributors can find issues which are still up for grabs
+using the following guidelines:
+
+* First, to **determine if an issue is claimed**:
+
+  * Check for linked pull requests
+  * Check the conversation to see if anyone has said that they're working on
+    creating a pull request
+
+* If a contributor comments on an issue to say they are working on it,
+  a pull request is expected within 2 weeks (new contributor) or 4 weeks
+  (contributor or core dev), unless an larger time frame is explicitly given.
+  Beyond that time, another contributor can take the issue and make a
+  pull request for it. We encourage contributors to comment directly on the
+  stalled or unclaimed issue to let community members know that they will be
+  working on it.
+
+* If the issue is linked to a :ref:`stalled pull request <stalled_pull_request>`,
+  we recommend that contributors follow the procedure
+  described in the :ref:`stalled_pull_request`
+  section rather than working directly on the issue.
+
 .. _new_contributors:
 
 Issues for New Contributors
@@ -552,7 +662,7 @@ underestimate how easy an issue is to so
     we use the help wanted tag to mark Pull Requests which have been abandoned
     by their original contributor and are available for someone to pick up where the original
     contributor left off. The list of issues with the help wanted tag can be found
-    `here <https://github.com/scikit-learn/scikit-learn/labels/help%20wanted>`__ .
+    `here <https://github.com/scikit-learn/scikit-learn/labels/help%20wanted>`_.
 
     Note that not all issues which need contributors will have this tag.
 
@@ -581,27 +691,38 @@ the development version.
 ..
     packaging is not needed once setuptools starts shipping packaging>=17.0
 
-Building the documentation requires installing some additional packages::
+Building the documentation requires installing some additional packages:
+
+.. prompt:: bash $
 
     pip install sphinx sphinx-gallery numpydoc matplotlib Pillow pandas \
-                scikit-image packaging seaborn
+                scikit-image packaging seaborn sphinx-prompt \
+                sphinxext-opengraph
+
+To build the documentation, you need to be in the ``doc`` folder:
 
-To build the documentation, you need to be in the ``doc`` folder::
+.. prompt:: bash $
 
     cd doc
 
 In the vast majority of cases, you only need to generate the full web site,
-without the example gallery::
+without the example gallery:
+
+.. prompt:: bash $
 
     make
 
 The documentation will be generated in the ``_build/html/stable`` directory.
-To also generate the example gallery you can use::
+To also generate the example gallery you can use:
+
+.. prompt:: bash $
 
     make html
 
 This will run all the examples, which takes a while. If you only want to
-generate a few examples, you can use::
+generate a few examples, you can use:
+
+.. prompt:: bash $
 
     EXAMPLES_PATTERN=your_regex_goes_here make html
 
@@ -610,7 +731,9 @@ This is particularly useful if you are m
 Set the environment variable `NO_MATHJAX=1` if you intend to view
 the documentation in an offline setting.
 
-To build the PDF manual, run::
+To build the PDF manual, run:
+
+.. prompt:: bash $
 
     make latexpdf
 
@@ -650,12 +773,12 @@ opposed to how it works "under the hood"
 
 Finally, follow the formatting rules below to make it consistently good:
 
-* Add "See also" in docstrings for related classes/functions.
+* Add "See Also" in docstrings for related classes/functions.
 
-* "See also" in docstrings should be one line per reference,
+* "See Also" in docstrings should be one line per reference,
   with a colon and an explanation, for example::
 
-    See also
+    See Also
     --------
     SelectKBest : Select features based on the k highest scores.
     SelectFpr : Select features based on a false positive rate test.
@@ -671,7 +794,7 @@ Finally, follow the formatting rules bel
         literal (either `hello` or `goodbye`), a bool, or an int. The default
         value is True.
 
-    array_parameter : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features) or (n_samples,)
+    array_parameter : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples,)
         This parameter accepts data in either of the mentioned forms, with one
         of the mentioned shapes. The default value is
         `np.ones(shape=(n_samples,))`.
@@ -682,35 +805,55 @@ Finally, follow the formatting rules bel
 
     sample_weight : array-like of shape (n_samples,), default=None
 
-In general have the following in mind:
+    multioutput_array : ndarray of shape (n_samples, n_classes) or list of such arrays
+
+  In general have the following in mind:
 
-    1. Use Python basic types. (``bool`` instead of ``boolean``)
-    2. Use parenthesis for defining shapes: ``array-like of shape (n_samples,)``
-       or ``array-like of shape (n_samples, n_features)``
-    3. For strings with multiple options, use brackets:
-       ``input: {'log', 'squared', 'multinomial'}``
-    4. 1D or 2D data can be a subset of
-       ``{array-like, ndarray, sparse matrix, dataframe}``. Note that ``array-like``
-       can also be a ``list``, while ``ndarray`` is explicitly only a ``numpy.ndarray``.
-    5. When specifying the data type of a list, use ``of`` as a delimiter:
-       ``list of int``.
-    6. When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32``
-       after defining the shape:
-       ``ndarray of shape (n_samples,), dtype=np.int32``.
-    7. When the default is ``None``, ``None`` only needs to be specified at the
-       end with ``default=None``. Be sure to include in the docstring, what it
-       means for the parameter or attribute to be ``None``.
+      1. Use Python basic types. (``bool`` instead of ``boolean``)
+      2. Use parenthesis for defining shapes: ``array-like of shape (n_samples,)``
+         or ``array-like of shape (n_samples, n_features)``
+      3. For strings with multiple options, use brackets:
+         ``input: {'log', 'squared', 'multinomial'}``
+      4. 1D or 2D data can be a subset of
+         ``{array-like, ndarray, sparse matrix, dataframe}``. Note that ``array-like``
+         can also be a ``list``, while ``ndarray`` is explicitly only a ``numpy.ndarray``.
+      5. Specify ``dataframe`` when "frame-like" features are being used, such
+         as the column names.
+      6. When specifying the data type of a list, use ``of`` as a delimiter:
+         ``list of int``. When the parameter supports arrays giving details
+         about the shape and/or data type and a list of such arrays, you can
+         use one of ``array-like of shape (n_samples,) or list of such arrays``.
+      7. When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32``
+         after defining the shape:
+         ``ndarray of shape (n_samples,), dtype=np.int32``. You can specify
+         multiple dtype as a set:
+         ``array-like of shape (n_samples,), dtype={np.float64, np.float32}``.
+         If one wants to mention arbitrary precision, use `integral` and
+         `floating` rather than the Python dtype `int` and `float`. When both
+         `int` and `floating` are supported, there is no need to specify the
+         dtype.
+      8. When the default is ``None``, ``None`` only needs to be specified at the
+         end with ``default=None``. Be sure to include in the docstring, what it
+         means for the parameter or attribute to be ``None``.
 
 * For unwritten formatting rules, try to follow existing good works:
 
+    * When bibliographic references are available with `arxiv <https://arxiv.org/>`_
+      or `Digital Object Identifier <https://www.doi.org/>`_ identification numbers,
+      use the sphinx directives `:arxiv:` or `:doi:`. For example, see references in
+      :ref:`Spectral Clustering Graphs <spectral_clustering_graph>`.
     * For "References" in docstrings, see the Silhouette Coefficient
       (:func:`sklearn.metrics.silhouette_score`).
 
 * When editing reStructuredText (``.rst``) files, try to keep line length under
   80 characters when possible (exceptions include links and tables).
 
-* Before submitting you pull request check if your modifications have introduced
-  new sphinx warnings and try to fix them.
+* Do not modify sphinx labels as this would break existing cross references and
+  external links pointing to specific sections in the
+  scikit-learn documentation.
+
+* Before submitting your pull request check if your modifications have
+  introduced new sphinx warnings and try to fix them.
 
 .. _generated_doc_CI:
 
@@ -774,6 +917,126 @@ To test code coverage, you need to insta
 
 3. Loop.
 
+.. _monitoring_performances:
+
+Monitoring performance
+======================
+
+*This section is heavily inspired from the* `pandas documentation
+<https://pandas.pydata.org/docs/development/contributing.html#running-the-performance-test-suite>`_.
+
+When proposing changes to the existing code base, it's important to make sure
+that they don't introduce performance regressions. Scikit-learn uses
+`asv benchmarks <https://github.com/airspeed-velocity/asv>`_ to monitor the
+performance of a selection of common estimators and functions. You can view
+these benchmarks on the `scikit-learn benchmark page <https://scikit-learn.org/scikit-learn-benchmarks>`_.
+The corresponding benchmark suite can be found in the `scikit-learn/asv_benchmarks` directory.
+
+To use all features of asv, you will need either `conda` or `virtualenv`. For
+more details please check the `asv installation webpage
+<https://asv.readthedocs.io/en/latest/installing.html>`_.
+
+First of all you need to install the development version of asv:
+
+.. prompt:: bash $
+
+    pip install git+https://github.com/airspeed-velocity/asv
+
+and change your directory to `asv_benchmarks/`:
+
+.. prompt:: bash $
+
+  cd asv_benchmarks/
+
+The benchmark suite is configured to run against your local clone of
+scikit-learn. Make sure it is up to date:
+
+.. prompt:: bash $
+
+  git fetch upstream
+
+In the benchmark suite, the benchmarks are organized following the same
+structure as scikit-learn. For example, you can compare the performance of a
+specific estimator between ``upstream/main`` and the branch you are working on:
+
+.. prompt:: bash $
+
+  asv continuous -b LogisticRegression upstream/main HEAD
+
+The command uses conda by default for creating the benchmark environments. If
+you want to use virtualenv instead, use the `-E` flag:
+
+.. prompt:: bash $
+
+  asv continuous -E virtualenv -b LogisticRegression upstream/main HEAD
+
+You can also specify a whole module to benchmark:
+
+.. prompt:: bash $
+
+  asv continuous -b linear_model upstream/main HEAD
+
+You can replace `HEAD` by any local branch. By default it will only report the
+benchmarks that have change by at least 10%. You can control this ratio with
+the `-f` flag.
+
+To run the full benchmark suite, simply remove the `-b` flag :
+
+.. prompt:: bash $
+
+  asv continuous upstream/main HEAD
+
+However this can take up to two hours. The `-b` flag also accepts a regular
+expression for a more complex subset of benchmarks to run.
+
+To run the benchmarks without comparing to another branch, use the `run`
+command:
+
+.. prompt:: bash $
+
+  asv run -b linear_model HEAD^!
+
+You can also run the benchmark suite using the version of scikit-learn already
+installed in your current Python environment:
+
+.. prompt:: bash $
+
+  asv run --python=same
+
+It's particularly useful when you installed scikit-learn in editable mode to
+avoid creating a new environment each time you run the benchmarks. By default
+the results are not saved when using an existing installation. To save the
+results you must specify a commit hash:
+
+.. prompt:: bash $
+
+  asv run --python=same --set-commit-hash=<commit hash>
+
+Benchmarks are saved and organized by machine, environment and commit. To see
+the list of all saved benchmarks:
+
+.. prompt:: bash $
+
+  asv show
+
+and to see the report of a specific run:
+
+.. prompt:: bash $
+
+  asv show <commit hash>
+
+When running benchmarks for a pull request you're working on please report the
+results on github.
+
+The benchmark suite supports additional configurable options which can be set
+in the `benchmarks/config.json` configuration file. For example, the benchmarks
+can run for a provided list of values for the `n_jobs` parameter.
+
+More information on how to write a benchmark and how to use asv can be found in
+the `asv documentation <https://asv.readthedocs.io/en/latest/index.html>`_.
+
+.. _issue_tracker_tags:
+
 Issue Tracker Tags
 ==================
 
@@ -851,8 +1114,8 @@ use the decorator ``deprecated`` on a pr
 decorator for the docstrings to be rendered properly.
 E.g., renaming an attribute ``labels_`` to ``classes_`` can be done as::
 
-    @deprecated("Attribute labels_ was deprecated in version 0.13 and "
-                "will be removed in 0.15. Use 'classes_' instead")
+    @deprecated("Attribute `labels_` was deprecated in version 0.13 and "
+                "will be removed in 0.15. Use `classes_` instead")
     @property
     def labels_(self):
         return self.classes_
@@ -916,9 +1179,10 @@ Change the default value of a parameter
 
 If the default value of a parameter needs to be changed, please replace the
 default value with a specific value (e.g., ``warn``) and raise
-``FutureWarning`` when users are using the default value. In the following
-example, we change the default value of ``n_clusters`` from 5 to 10
-(current version is 0.20)::
+``FutureWarning`` when users are using the default value. The following
+example assumes that the current version is 0.20 and that we change the
+default value of ``n_clusters`` from 5 (old default for 0.20) to 10
+(new default for 0.22)::
 
     import warnings
 
@@ -944,9 +1208,17 @@ When the change is in a class, we valida
 
 Similar to deprecations, the warning message should always give both the
 version in which the change happened and the version in which the old behavior
-will be removed. The docstring needs to be updated accordingly. We need a test
-which ensures that the warning is raised in relevant cases but not in other
-cases. The warning should be caught in all other tests
+will be removed.
+
+The parameter description in the docstring needs to be updated accordingly by adding
+a `versionchanged` directive with the old and new default value, pointing to the
+version when the change will be effective::
+
+    .. versionchanged:: 0.22
+       The default value for `n_clusters` will change from 5 to 10 in version 0.22.
+
+Finally, we need a test which ensures that the warning is raised in relevant cases but
+not in other cases. The warning should be caught in all other tests
 (using e.g., ``@pytest.mark.filterwarnings``), and there should be no warning
 in the examples.
 
@@ -964,6 +1236,15 @@ use, and so can respond critically about
 each pull request needs to be signed off by two core developers, you can speed
 up this process by providing your feedback.
 
+.. note::
+
+  The difference between an objective improvement and a subjective nit isn't
+  always clear. Reviewers should recall that code review is primarily about
+  reducing risk in the project. When reviewing code, one should aim at
+  preventing situations which may require a bug fix, a deprecation, or a
+  retraction. Regarding docs: typos, grammar issues and disambiguations are
+  better addressed immediately.
+
 Here are a few important aspects that need to be covered in any code review,
 from high-level questions to a more detailed check-list.
 
@@ -1013,11 +1294,43 @@ from high-level questions to a more deta
 
 :ref:`saved_replies` includes some frequent comments that reviewers may make.
 
+.. _communication:
+
+Communication Guidelines
+------------------------
+
+Reviewing open pull requests (PRs) helps move the project forward. It is a
+great way to get familiar with the codebase and should motivate the
+contributor to keep involved in the project. [1]_
+
+- Every PR, good or bad, is an act of generosity. Opening with a positive
+  comment will help the author feel rewarded, and your subsequent remarks may
+  be heard more clearly. You may feel good also.
+- Begin if possible with the large issues, so the author knows they’ve been
+  understood. Resist the temptation to immediately go line by line, or to open
+  with small pervasive issues.
+- Do not let perfect be the enemy of the good. If you find yourself making
+  many small suggestions that don't fall into the :ref:`code_review`, consider
+  the following approaches:
+
+  - refrain from submitting these;
+  - prefix them as "Nit" so that the contributor knows it's OK not to address;
+  - follow up in a subsequent PR, out of courtesy, you may want to let the
+    original contributor know.
+
+- Do not rush, take the time to make your comments clear and justify your
+  suggestions.
+- You are the face of the project. Bad days occur to everyone, in that
+  occasion you deserve a break: try to take your time and stay offline.
+
+.. [1] Adapted from the numpy `communication guidelines
+       <https://numpy.org/devdocs/dev/reviewer_guidelines.html#communication-guidelines>`_.
+
 Reading the existing code base
 ==============================
 
 Reading and digesting an existing code base is always a difficult exercise
-that takes time and experience to master. Even though we try to write simple
+that takes time and experience to main. Even though we try to write simple
 code in general, understanding the code can seem overwhelming at first,
 given the sheer size of the project. Here is a list of tips that may help
 make this task easier and faster (in no particular order).
@@ -1033,17 +1346,17 @@ make this task easier and faster (in no
   is performed, especially at the beginning of the :term:`fit` methods.
   Sometimes, only a very small portion of the code is doing the actual job.
   For example looking at the ``fit()`` method of
-  :class:`sklearn.linear_model.LinearRegression`, what you're looking for
+  :class:`~linear_model.LinearRegression`, what you're looking for
   might just be the call the ``scipy.linalg.lstsq``, but it is buried into
   multiple lines of input checking and the handling of different kinds of
   parameters.
 - Due to the use of `Inheritance
   <https://en.wikipedia.org/wiki/Inheritance_(object-oriented_programming)>`_,
   some methods may be implemented in parent classes. All estimators inherit
-  at least from :class:`BaseEstimator <sklearn.base.BaseEstimator>`, and
-  from a ``Mixin`` class (e.g. :class:`ClassifierMixin
-  <sklearn.base.ClassifierMixin>`) that enables default behaviour depending
-  on the nature of the estimator (classifier, regressor, transformer, etc.).
+  at least from :class:`~base.BaseEstimator`, and
+  from a ``Mixin`` class (e.g. :class:`~base.ClassifierMixin`) that enables default
+  behaviour depending on the nature of the estimator (classifier, regressor,
+  transformer, etc.).
 - Sometimes, reading the tests for a given function will give you an idea of
   what its intended purpose is. You can use ``git grep`` (see below) to find
   all the tests written for a function. Most tests for a specific
@@ -1071,3 +1384,13 @@ make this task easier and faster (in no
     <https://git-scm.com/docs/git-grep#_examples>`_) is also extremely
     useful to see every occurrence of a pattern (e.g. a function call or a
     variable) in the code base.
+
+- Configure `git blame` to ignore the commit that migrated the code style to
+  `black`.
+
+  .. prompt:: bash $
+
+      git config blame.ignoreRevsFile .git-blame-ignore-revs
+
+  Find out more information in black's
+  `documentation for avoiding ruining git blame <https://black.readthedocs.io/en/stable/guides/introducing_black_to_your_project.html#avoiding-ruining-git-blame>`_.
diff -pruN 0.23.2-5/doc/developers/develop.rst 1.1.1-1/doc/developers/develop.rst
--- 0.23.2-5/doc/developers/develop.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/developers/develop.rst	2022-05-19 12:16:26.436782000 +0000
@@ -5,9 +5,9 @@ Developing scikit-learn estimators
 ==================================
 
 Whether you are proposing an estimator for inclusion in scikit-learn,
-developing a separate package compatible with scikit-learn, or 
-implementing custom components for your own projects, this chapter 
-details how to develop objects that safely interact with scikit-learn 
+developing a separate package compatible with scikit-learn, or
+implementing custom components for your own projects, this chapter
+details how to develop objects that safely interact with scikit-learn
 Pipelines and model selection tools.
 
 .. currentmodule:: sklearn
@@ -148,19 +148,19 @@ The ``fit()`` method takes the training
 array in the case of unsupervised learning, or two arrays in the case
 of supervised learning.
 
-Note that the model is fitted using X and y, but the object holds no
-reference to X and y. There are, however, some exceptions to this, as in
+Note that the model is fitted using ``X`` and ``y``, but the object holds no
+reference to ``X`` and ``y``. There are, however, some exceptions to this, as in
 the case of precomputed kernels where this data must be stored for use by
 the predict method.
 
 ============= ======================================================
 Parameters
 ============= ======================================================
-X             array-like, shape (n_samples, n_features)
+X             array-like of shape (n_samples, n_features)
 
-y             array, shape (n_samples,)
+y             array-like of shape (n_samples,)
 
-kwargs        optional data-dependent parameters.
+kwargs        optional data-dependent parameters
 ============= ======================================================
 
 ``X.shape[0]`` should be the same as ``y.shape[0]``. If this requisite
@@ -217,15 +217,6 @@ Optional Arguments
 In iterative algorithms, the number of iterations should be specified by
 an integer called ``n_iter``.
 
-Pairwise Attributes
-^^^^^^^^^^^^^^^^^^^
-
-An estimator that accepts ``X`` of shape ``(n_samples, n_samples)`` and defines
-a :term:`_pairwise` property equal to ``True`` allows for cross-validation of
-the dataset, e.g. when ``X`` is a precomputed kernel matrix. Specifically,
-the :term:`_pairwise` property is used by ``utils.metaestimators._safe_split``
-to slice rows and columns.
-
 Universal attributes
 ^^^^^^^^^^^^^^^^^^^^
 
@@ -315,7 +306,7 @@ the correct interface more easily.
       ...
       ...     def predict(self, X):
       ...
-      ...         # Check is fit had been called
+      ...         # Check if fit has been called
       ...         check_is_fitted(self)
       ...
       ...         # Input validation
@@ -330,11 +321,54 @@ get_params and set_params
 All scikit-learn estimators have ``get_params`` and ``set_params`` functions.
 The ``get_params`` function takes no arguments and returns a dict of the
 ``__init__`` parameters of the estimator, together with their values.
-It must take one keyword argument, ``deep``,
-which receives a boolean value that determines
-whether the method should return the parameters of sub-estimators
-(for most estimators, this can be ignored).
-The default value for ``deep`` should be true.
+
+It must take one keyword argument, ``deep``, which receives a boolean value
+that determines whether the method should return the parameters of
+sub-estimators (for most estimators, this can be ignored). The default value
+for ``deep`` should be `True`. For instance considering the following
+estimator::
+
+    >>> from sklearn.base import BaseEstimator
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> class MyEstimator(BaseEstimator):
+    ...     def __init__(self, subestimator=None, my_extra_param="random"):
+    ...         self.subestimator = subestimator
+    ...         self.my_extra_param = my_extra_param
+
+The parameter `deep` will control whether or not the parameters of the
+`subestimator` should be reported. Thus when `deep=True`, the output will be::
+
+    >>> my_estimator = MyEstimator(subestimator=LogisticRegression())
+    >>> for param, value in my_estimator.get_params(deep=True).items():
+    ...     print(f"{param} -> {value}")
+    my_extra_param -> random
+    subestimator__C -> 1.0
+    subestimator__class_weight -> None
+    subestimator__dual -> False
+    subestimator__fit_intercept -> True
+    subestimator__intercept_scaling -> 1
+    subestimator__l1_ratio -> None
+    subestimator__max_iter -> 100
+    subestimator__multi_class -> auto
+    subestimator__n_jobs -> None
+    subestimator__penalty -> l2
+    subestimator__random_state -> None
+    subestimator__solver -> lbfgs
+    subestimator__tol -> 0.0001
+    subestimator__verbose -> 0
+    subestimator__warm_start -> False
+    subestimator -> LogisticRegression()
+
+Often, the `subestimator` has a name (as e.g. named steps in a
+:class:`~sklearn.pipeline.Pipeline` object), in which case the key should
+become `<name>__C`, `<name>__class_weight`, etc.
+
+While when `deep=False`, the output will be::
+
+    >>> for param, value in my_estimator.get_params(deep=False).items():
+    ...     print(f"{param} -> {value}")
+    my_extra_param -> random
+    subestimator -> LogisticRegression()
 
 The ``set_params`` on the other hand takes as input a dict of the form
 ``'parameter': value`` and sets the parameter of the estimator using this dict.
@@ -463,12 +497,12 @@ Scikit-learn introduced estimator tags i
 of estimators that allow programmatic inspection of their capabilities, such as
 sparse matrix support, supported output types and supported methods. The
 estimator tags are a dictionary returned by the method ``_get_tags()``. These
-tags are used by the common tests and the
-:func:`sklearn.utils.estimator_checks.check_estimator` function to decide what
-tests to run and what input data is appropriate. Tags can depend on estimator
-parameters or even system architecture and can in general only be determined at
-runtime. The default values for the estimator tags are defined in the
-``BaseEstimator`` class.
+tags are used in the common checks run by the
+:func:`~sklearn.utils.estimator_checks.check_estimator` function and the
+:func:`~sklearn.utils.estimator_checks.parametrize_with_checks` decorator.
+Tags determine which checks to run and what input data is appropriate. Tags
+can depend on estimator parameters or even system architecture and can in
+general only be determined at runtime.
 
 The current set of estimator tags are:
 
@@ -496,6 +530,26 @@ no_validation (default=False)
 non_deterministic (default=False)
     whether the estimator is not deterministic given a fixed ``random_state``
 
+pairwise (default=False)
+    This boolean attribute indicates whether the data (`X`) :term:`fit` and
+    similar methods consists of pairwise measures over samples rather than a
+    feature representation for each sample.  It is usually `True` where an
+    estimator has a `metric` or `affinity` or `kernel` parameter with value
+    'precomputed'. Its primary purpose is to support a :term:`meta-estimator`
+    or a cross validation procedure that extracts a sub-sample of data intended
+    for a pairwise estimator, where the data needs to be indexed on both axes.
+    Specifically, this tag is used by
+    :func:`~sklearn.utils.metaestimators._safe_split` to slice rows and
+    columns.
+
+preserves_dtype (default=``[np.float64]``)
+    applies only on transformers. It corresponds to the data types which will
+    be preserved such that `X_trans.dtype` is the same as `X.dtype` after
+    calling `transformer.transform(X)`. If this list is empty, then the
+    transformer is not expected to preserve the data type. The first value in
+    the list is considered as the default data type, corresponding to the data
+    type of the output when the input data type is not going to be preserved.
+
 poor_score (default=False)
     whether the estimator fails to provide a "reasonable" test-set score, which
     currently for regression is an R2 of 0.5 on a subset of the boston housing
@@ -526,9 +580,10 @@ _skip_test (default=False)
 _xfail_checks (default=False)
     dictionary ``{check_name: reason}`` of common checks that will be marked
     as `XFAIL` for pytest, when using
-    :func:`~sklearn.utils.estimator_checks.parametrize_with_checks`. This tag
-    currently has no effect on
-    :func:`~sklearn.utils.estimator_checks.check_estimator`.
+    :func:`~sklearn.utils.estimator_checks.parametrize_with_checks`. These
+    checks will be simply ignored and not run by
+    :func:`~sklearn.utils.estimator_checks.check_estimator`, but a
+    `SkipTestWarning` will be raised.
     Don't use this unless there is a *very good* reason for your estimator
     not to pass the check.
     Also note that the usage of this tag is highly subject to change because
@@ -551,9 +606,10 @@ X_types (default=['2darray'])
     ``'categorical'`` data. For now, the test for sparse data do not make use
     of the ``'sparse'`` tag.
 
-
-To override the tags of a child class, one must define the `_more_tags()`
-method and return a dict with the desired tags, e.g::
+It is unlikely that the default values for each tag will suit the needs of your
+specific estimator. Additional tags can be created or default tags can be
+overridden by defining a `_more_tags()` method which returns a dict with the
+desired overridden tags or new tags. For example::
 
     class MyMultiOutputEstimator(BaseEstimator):
 
@@ -561,11 +617,19 @@ method and return a dict with the desire
             return {'multioutput_only': True,
                     'non_deterministic': True}
 
+Any tag that is not in `_more_tags()` will just fall-back to the default values
+documented above.
+
+Even if it is not recommended, it is possible to override the method
+`_get_tags()`. Note however that **all tags must be present in the dict**. If
+any of the keys documented above is not present in the output of `_get_tags()`,
+an error will occur.
+
 In addition to the tags, estimators also need to declare any non-optional
 parameters to ``__init__`` in the ``_required_parameters`` class attribute,
 which is a list or tuple.  If ``_required_parameters`` is only
 ``["estimator"]`` or ``["base_estimator"]``, then the estimator will be
-instantiated with an instance of ``LinearDiscriminantAnalysis`` (or
+instantiated with an instance of ``LogisticRegression`` (or
 ``RidgeRegression`` if the estimator is a regressor) in the tests. The choice
 of these two models is somewhat idiosyncratic but both should provide robust
 closed-form solutions.
@@ -575,10 +639,10 @@ closed-form solutions.
 Coding guidelines
 =================
 
-The following are some guidelines on how new code should be written for 
-inclusion in scikit-learn, and which may be appropriate to adopt in external 
-projects. Of course, there are special cases and there will be exceptions to 
-these rules. However, following these rules when submitting new code makes 
+The following are some guidelines on how new code should be written for
+inclusion in scikit-learn, and which may be appropriate to adopt in external
+projects. Of course, there are special cases and there will be exceptions to
+these rules. However, following these rules when submitting new code makes
 the review easier so new code can be integrated in less time.
 
 Uniformly formatted code makes it easier to share code ownership. The
@@ -652,21 +716,22 @@ Here's a simple example of code using so
     from sklearn.utils import check_array, check_random_state
 
     def choose_random_sample(X, random_state=0):
-        """
-        Choose a random point from X
+        """Choose a random point from X.
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
-            array representing the data
-        random_state : RandomState or an int seed (0 by default)
-            A random number generator instance to define the state of the
-            random permutations generator.
+        X : array-like of shape (n_samples, n_features)
+            An array representing the data.
+        random_state : int or RandomState instance, default=0
+            The seed of the pseudo random number generator that selects a
+            random sample. Pass an int for reproducible output across multiple
+            function calls.
+            See :term:`Glossary <random_state>`.
 
         Returns
         -------
-        x : numpy array, shape (n_features,)
-            A random point selected from X
+        x : ndarray of shape (n_features,)
+            A random point selected from X.
         """
         X = check_array(X)
         random_state = check_random_state(random_state)
@@ -695,6 +760,7 @@ The following example should make this c
 
         def __init__(self, n_components=100, random_state=None):
             self.random_state = random_state
+            self.n_components = n_components
 
         # the arguments are ignored anyway, so we make them optional
         def fit(self, X=None, y=None):
@@ -702,9 +768,25 @@ The following example should make this c
 
         def transform(self, X):
             n_samples = X.shape[0]
-            return self.random_state_.randn(n_samples, n_components)
+            return self.random_state_.randn(n_samples, self.n_components)
 
 The reason for this setup is reproducibility:
 when an estimator is ``fit`` twice to the same data,
 it should produce an identical model both times,
 hence the validation in ``fit``, not ``__init__``.
+
+Numerical assertions in tests
+-----------------------------
+
+When asserting the quasi-equality of arrays of continuous values,
+do use :func:`sklearn.utils._testing.assert_allclose`.
+
+The relative tolerance is automatically inferred from the provided arrays
+dtypes (for float32 and float64 dtypes in particular) but you can override
+via ``rtol``.
+
+When comparing arrays of zero-elements, please do provide a non-zero value for
+the absolute tolerance via ``atol``.
+
+For more information, please refer to the docstring of
+:func:`sklearn.utils._testing.assert_allclose`.
diff -pruN 0.23.2-5/doc/developers/index.rst 1.1.1-1/doc/developers/index.rst
--- 0.23.2-5/doc/developers/index.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/developers/index.rst	2022-05-19 12:16:26.436782000 +0000
@@ -1,6 +1,6 @@
-.. Places global toc into the sidebar
+.. Places parent toc into the sidebar
 
-:globalsidebartoc: True
+:parenttoc: True
 
 .. _developers_guide:
 
@@ -14,10 +14,12 @@ Developer's Guide
 .. toctree::
 
    contributing
+   minimal_reproducer
    develop
    tips
    utilities
    performance
    advanced_installation
+   bug_triaging
    maintainer
    plotting
diff -pruN 0.23.2-5/doc/developers/maintainer.rst 1.1.1-1/doc/developers/maintainer.rst
--- 0.23.2-5/doc/developers/maintainer.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/developers/maintainer.rst	2022-05-19 12:16:26.436782000 +0000
@@ -15,9 +15,11 @@ post, and minor releases.
 Before a release
 ................
 
-1. Update authors table::
+1. Update authors table:
 
-    $ cd build_tools; make authors; cd ..
+   .. prompt:: bash $
+
+       cd build_tools; make authors; cd ..
 
    and commit. This is only needed if the authors have changed since the last
    release. This step is sometimes done independent of the release. This
@@ -31,7 +33,7 @@ Before a release
 
    - ``maint_tools/sort_whats_new.py`` can put what's new entries into
      sections. It's not perfect, and requires manual checking of the changes.
-     If the whats new list is well curated, it may not be necessary.
+     If the what's new list is well curated, it may not be necessary.
 
    - The ``maint_tools/whats_missing.sh`` script may be used to identify pull
      requests that were merged but likely missing from What's New.
@@ -46,174 +48,266 @@ permissions given to maintainers, which
 
 - *maintainer* role on ``scikit-learn`` projects on ``pypi.org`` and
   ``test.pypi.org``, separately.
-- become a member of the *scikit-learn* team on conda-forge by editing the 
-  ``recipe/meta.yaml`` file on 
+- become a member of the *scikit-learn* team on conda-forge by editing the
+  ``recipe/meta.yaml`` file on
   ``https://github.com/conda-forge/scikit-learn-feedstock``
-- *maintainer* on ``https://github.com/MacPython/scikit-learn-wheels``
-
 
 .. _preparing_a_release_pr:
 
 Preparing a release PR
 ......................
 
-Releasing the first RC of e.g. version `0.99` involves creating the release
+Major version release
+~~~~~~~~~~~~~~~~~~~~~
+
+Prior to branching please do not forget to prepare a Release Highlights page as
+a runnable example and check that its HTML rendering looks correct. These
+release highlights should be linked from the ``doc/whats_new/v0.99.rst`` file
+for the new version of scikit-learn.
+
+Releasing the first RC of e.g. version `0.99.0` involves creating the release
 branch `0.99.X` directly on the main repo, where `X` really is the letter X,
-**not a placeholder**. This is considered the *feature freeze*. The
-development for the major and minor releases of 0.99 should
-**also** happen under `0.99.X`. Each release (rc, major, or minor) is a tag
-under that branch.
+**not a placeholder**. The development for the major and minor releases of `0.99`
+should **also** happen under `0.99.X`. Each release (rc, major, or minor) is a
+tag under that branch.
+
+This is done only once, as the major and minor releases happen on the same
+branch:
+
+   .. prompt:: bash $
+
+     # Assuming upstream is an alias for the main scikit-learn repo:
+     git fetch upstream main
+     git checkout upstream/main
+     git checkout -b 0.99.X
+     git push --set-upstream upstream 0.99.X
+
+   Again, `X` is literal here, and `99` is replaced by the release number.
+   The branches are called ``0.19.X``, ``0.20.X``, etc.
 
 In terms of including changes, the first RC ideally counts as a *feature
 freeze*. Each coming release candidate and the final release afterwards will
-include minor documentation changes and bug fixes. Any major enhancement or
-feature should be excluded.
+include only minor documentation changes and bug fixes. Any major enhancement
+or feature should be excluded.
+
+Then you can prepare a local branch for the release itself, for instance:
+``release-0.99.0rc1``, push it to your github fork and open a PR **to the**
+`scikit-learn/0.99.X` **branch**. Copy the :ref:`release_checklist` templates
+in the description of the Pull Request to track progress.
+
+This PR will be used to push commits related to the release as explained in
+:ref:`making_a_release`.
+
+You can also create a second PR from main and targeting main to increment
+the ``__version__`` variable in `sklearn/__init__.py` to increment the dev
+version. This means while we're in the release candidate period, the latest
+stable is two versions behind the main branch, instead of one. In this PR
+targeting main you should also include a new file for the matching version
+under the ``doc/whats_new/`` folder so PRs that target the next version can
+contribute their changelog entries to this file in parallel to the release
+process.
+
+Minor version release
+~~~~~~~~~~~~~~~~~~~~~
 
 The minor releases should include bug fixes and some relevant documentation
 changes only. Any PR resulting in a behavior change which is not a bug fix
 should be excluded.
 
-First, create a branch, **on your own fork** (to release e.g. `0.999.3`)::
+First, create a branch, **on your own fork** (to release e.g. `0.99.3`):
 
-    $ # assuming master and upstream/master are the same
-    $ git checkout -b release-0.999.3 master
+.. prompt:: bash $
 
-Then, create a PR **to the** `scikit-learn/0.999.X` **branch** (not to
-master!) with all the desired changes::
+    # assuming main and upstream/main are the same
+    git checkout -b release-0.99.3 main
 
-	$ git rebase -i upstream/0.999.2
+Then, create a PR **to the** `scikit-learn/0.99.X` **branch** (not to
+main!) with all the desired changes:
 
-Do not forget to add a commit updating sklearn.__version__.
+.. prompt:: bash $
+
+	git rebase -i upstream/0.99.2
+
+Copy the :ref:`release_checklist` templates in the description of the Pull
+Request to track progress.
+
+Do not forget to add a commit updating ``sklearn.__version__``.
 
 It's nice to have a copy of the ``git rebase -i`` log in the PR to help others
 understand what's included.
 
+.. _making_a_release:
+
 Making a release
 ................
 
-0. Create the release branch on the main repo, if it does not exist. This is
-   done only once, as the major and minor releases happen on the same branch::
-
-     $ git checkout -b 0.99.X
-
-   Again, `X` is literal here, and `99` is replaced by the release number.
-   The branches are called ``0.19.X``, ``0.20.X``, etc.
+0. Ensure that you have checked out the branch of the release PR as explained
+   in :ref:`preparing_a_release_pr` above.
 
 1. Update docs. Note that this is for the final release, not necessarily for
-   the RC releases. These changes should be made in master and cherry-picked
+   the RC releases. These changes should be made in main and cherry-picked
    into the release branch, only before the final release.
 
-   - Edit the doc/whats_new.rst file to add release title and commit
-     statistics. You can retrieve commit statistics with::
+   - Edit the ``doc/whats_new/v0.99.rst`` file to add release title and list of
+     contributors.
+     You can retrieve the list of contributor names with:
+
+     ::
 
-        $ git shortlog -s 0.99.33.. | cut -f2- | sort --ignore-case | tr '\n' ';' | sed 's/;/, /g;s/, $//'
+       $ git shortlog -s 0.98.33.. | cut -f2- | sort --ignore-case | tr '\n' ';' | sed 's/;/, /g;s/, $//' | fold -s
+
+     - For major releases, link the release highlights example from the ``doc/whats_new/v0.99.rst`` file.
 
    - Update the release date in ``whats_new.rst``
 
-   - Edit the doc/templates/index.html to change the 'News' entry of the front
-     page.
+   - Edit the ``doc/templates/index.html`` to change the 'News' entry of the
+     front page (with the release month as well).
 
 2. On the branch for releasing, update the version number in
-   `sklearn/__init__.py`, the ``__version__`` variable by removing ``dev*``
-   only when ready to release. On master, increment the version in the same
-   place (when branching for release). This means while we're in the release
-   candidate period, the latest stable is two versions behind the master
-   branch, instead of one.
+   ``sklearn/__init__.py``, the ``__version__``.
+
+   For major releases, please add a 0 at the end: `0.99.0` instead of `0.99`.
+
+   For the first release candidate, use the `rc1` suffix on the expected final
+   release number: `0.99.0rc1`.
+
+3. Trigger the wheel builder with the ``[cd build]`` commit marker using
+   the command:
+
+   .. prompt:: bash $
 
-3. At this point all relevant PRs should have been merged into the `0.99.X`
-   branch. Create the source tarball:
+    git commit --allow-empty -m "Trigger wheel builder workflow: [cd build]"
 
-   - Wipe clean your repo::
+   The wheel building workflow is managed by GitHub Actions and the results be browsed at:
+   https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22
 
-       $ git clean -xfd
+.. note::
 
-   - Generate the tarball::
+  Before building the wheels, make sure that the ``pyproject.toml`` file is
+  up to date and using the oldest version of ``numpy`` for each Python version
+  to avoid `ABI <https://en.wikipedia.org/wiki/Application_binary_interface>`_
+  incompatibility issues. Moreover, a new line have to be included in the
+  ``pyproject.toml`` file for each new supported version of Python.
 
-       $ python setup.py sdist
+.. note::
 
-   - You can also test a binary dist build using::
+  The acronym CD in `[cd build]` stands for `Continuous Delivery
+  <https://en.wikipedia.org/wiki/Continuous_delivery>`_ and refers to the
+  automation used to generate the release artifacts (binary and source
+  packages). This can be seen as an extension to CI which stands for
+  `Continuous Integration
+  <https://en.wikipedia.org/wiki/Continuous_integration>`_. The CD workflow on
+  GitHub Actions is also used to automatically create nightly builds and
+  publish packages for the development branch of scikit-learn. See
+  :ref:`install_nightly_builds`.
 
-       $ python setup.py bdist_wheel
+4. Once all the CD jobs have completed successfully in the PR, merge it,
+   again with the `[cd build]` marker in the commit message. This time
+   the results will be uploaded to the staging area.
 
-   - You can test if PyPi is going to accept the package using::
+   You should then be able to upload the generated artifacts (.tar.gz and .whl
+   files) to https://test.pypi.org using the "Run workflow" form for the
+   following GitHub Actions workflow:
 
-       $ twine check dist/*
+   https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Publish+to+Pypi%22
 
-   You can run ``twine check`` after step 5 (fetching artifacts) as well.
+5. If this went fine, you can proceed with tagging. Proceed with caution.
+   Ideally, tags should be created when you're almost certain that the release
+   is ready, since adding a tag to the main repo can trigger certain automated
+   processes.
 
-   The result should be in the `dist/` folder. We will upload it later
-   with the wheels. Check that you can install it in a new virtualenv and
-   that the tests pass.
+   Create the tag and push it (if it's an RC, it can be ``0.xx.0rc1`` for
+   instance):
 
-4. Proceed with caution. Ideally, tags should be created when you're almost
-   certain that the release is ready, since adding a tag to the main repo can
-   trigger certain automated processes. You can test upload the ``sdist`` to
-   ``test.pypi.org``, and test the next step by setting ``BUILD_COMMIT`` to the
-   branch name (``0.99.X`` for instance) in a PR to the wheel building repo.
-   Once all works, you can proceed with tagging. Create the tag and push it (if
-   it's an RC, it can be ``0.xxrc1`` for instance)::
+   .. prompt:: bash $
 
-    $ git tag -a 0.99  # in the 0.99.X branch
+     git tag -a 0.99.0  # in the 0.99.X branch
+     git push git@github.com:scikit-learn/scikit-learn.git 0.99.0
 
-    $ git push git@github.com:scikit-learn/scikit-learn.git 0.99
+6. Confirm that the bot has detected the tag on the conda-forge feedstock repo:
+   https://github.com/conda-forge/scikit-learn-feedstock. If not, submit a PR for the
+   release. If you want to publish an RC release on conda-forge, the PR should target
+   the `rc` branch as opposed to the `main` branch. The two branches need to be kept
+   sync together otherwise.
 
-5. Update the dependency versions and set ``BUILD_COMMIT`` variable to the
-   release tag at:
+7. Trigger the GitHub Actions workflow again but this time to upload the artifacts
+   to the real https://pypi.org (replace "testpypi" by "pypi" in the "Run
+   workflow" form).
 
-   https://github.com/MacPython/scikit-learn-wheels
+8. **Alternative to step 7**: it's possible to collect locally the generated binary
+   wheel packages and source tarball and upload them all to PyPI by running the
+   following commands in the scikit-learn source folder (checked out at the
+   release tag):
 
-   Once the CI has completed successfully, collect the generated binary wheel
-   packages and upload them to PyPI by running the following commands in the
-   scikit-learn source folder (checked out at the release tag)::
+   .. prompt:: bash $
 
-       $ rm -r dist # only if there's anything other than the sdist tar.gz there
-       $ pip install -U wheelhouse_uploader twine
-       $ python setup.py fetch_artifacts
+       rm -r dist
+       pip install -U wheelhouse_uploader twine
+       python -m wheelhouse_uploader fetch \
+         --version 0.99.0 \
+         --local-folder dist \
+         scikit-learn \
+         https://pypi.anaconda.org/scikit-learn-wheels-staging/simple/scikit-learn/
 
-6. Check the content of the `dist/` folder: it should contain all the wheels
+   This command will download all the binary packages accumulated in the
+   `staging area on the anaconda.org hosting service
+   <https://anaconda.org/scikit-learn-wheels-staging/scikit-learn/files>`_ and
+   put them in your local `./dist` folder.
+
+   Check the content of the `./dist` folder: it should contain all the wheels
    along with the source tarball ("scikit-learn-RRR.tar.gz").
 
    Make sure that you do not have developer versions or older versions of
    the scikit-learn package in that folder.
 
-   Before uploading to pypi, you can test upload to test.pypi.org::
+   Before uploading to pypi, you can test upload to test.pypi.org:
+
+   .. prompt:: bash $
+
+       twine upload --verbose --repository-url https://test.pypi.org/legacy/ dist/*
+
+   Upload everything at once to https://pypi.org:
 
-       $ twine upload --verbose --repository-url https://test.pypi.org/legacy/ dist/*
+   .. prompt:: bash $
 
-   Upload everything at once to https://pypi.org::
+       twine upload dist/*
 
-       $ twine upload dist/*
+9. For major/minor (not bug-fix release or release candidates), update the symlink for
+   ``stable`` and the ``latestStable`` variable in
+   https://github.com/scikit-learn/scikit-learn.github.io:
 
-7. For major/minor (not bug-fix release), update the symlink for ``stable``
-   and the ``latestStable`` variable in
-   https://github.com/scikit-learn/scikit-learn.github.io::
-
-       $ cd /tmp
-       $ git clone --depth 1 --no-checkout git@github.com:scikit-learn/scikit-learn.github.io.git
-       $ cd scikit-learn.github.io
-       $ echo stable > .git/info/sparse-checkout
-       $ git checkout master
-       $ rm stable
-       $ ln -s 0.999 stable
-       $ sed -i "s/latestStable = '.*/latestStable = '0.999';/" versionwarning.js
-       $ git add stable/ versionwarning.js
-       $ git commit -m "Update stable to point to 0.999"
-       $ git push origin master
+   .. prompt:: bash $
+
+       cd /tmp
+       git clone --depth 1 --no-checkout git@github.com:scikit-learn/scikit-learn.github.io.git
+       cd scikit-learn.github.io
+       echo stable > .git/info/sparse-checkout
+       git checkout main
+       rm stable
+       ln -s 0.999 stable
+       sed -i "s/latestStable = '.*/latestStable = '0.999';/" versionwarning.js
+       git add stable versionwarning.js
+       git commit -m "Update stable to point to 0.999"
+       git push origin main
+
+.. _release_checklist:
+
+Release checklist
+.................
 
 The following GitHub checklist might be helpful in a release PR::
 
-    * [ ] update news and what's new date in master and release branch
-    * [ ] create tag
-    * [ ] update dependencies and release tag at
-      https://github.com/MacPython/scikit-learn-wheels
-    * [ ] twine the wheels to PyPI when that's green
-    * [ ] https://github.com/scikit-learn/scikit-learn/releases draft
+    * [ ] update news and what's new date in release branch
+    * [ ] update news and what's new date and sklearn dev0 version in main branch
+    * [ ] check that the for the release wheels can be built successfully
+    * [ ] merge the PR with `[cd build]` commit message to upload wheels to the staging repo
+    * [ ] upload the wheels and source tarball to https://test.pypi.org
+    * [ ] create tag on the main github repo
     * [ ] confirm bot detected at
       https://github.com/conda-forge/scikit-learn-feedstock and wait for merge
-    * [ ] https://github.com/scikit-learn/scikit-learn/releases publish
-    * [ ] fix the binder release version in ``.binder/requirement.txt`` (see
-      #15847)
-    * [ ] announce on mailing list and on twitter
+    * [ ] upload the wheels and source tarball to PyPI
+    * [ ] https://github.com/scikit-learn/scikit-learn/releases publish (except for RC)
+    * [ ] announce on mailing list and on Twitter, and LinkedIn
 
 Merging Pull Requests
 ---------------------
@@ -276,10 +370,17 @@ deprecation cycle.
 
 To create an experimental module, you can just copy and modify the content of
 `enable_hist_gradient_boosting.py
-<https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/experimental/enable_hist_gradient_boosting.py>`_,
+<https://github.com/scikit-learn/scikit-learn/blob/c9c89cfc85dd8dfefd7921c16c87327d03140a06/sklearn/experimental/enable_hist_gradient_boosting.py>`__,
 or
 `enable_iterative_imputer.py
-<https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/experimental/enable_iterative_imputer.py>`_.
+<https://github.com/scikit-learn/scikit-learn/blob/c9c89cfc85dd8dfefd7921c16c87327d03140a06/sklearn/experimental/enable_iterative_imputer.py>`_.
+
+.. note::
+
+  These are permalink as in 0.24, where these estimators are still
+  experimental. They might be stable at the time of reading - hence the
+  permalink. See below for instructions on the transition from experimental
+  to stable.
 
 Note that the public import path must be to a public subpackage (like
 ``sklearn/ensemble`` or ``sklearn/impute``), not just a ``.py`` module.
@@ -287,19 +388,20 @@ Also, the (private) experimental feature
 submodule/subpackage of the public subpackage, e.g.
 ``sklearn/ensemble/_hist_gradient_boosting/`` or
 ``sklearn/impute/_iterative.py``. This is needed so that pickles still work
-in the future when the features aren't experimental anymore
+in the future when the features aren't experimental anymore.
 
-To avoid type checker (e.g. mypy) errors a direct import of experimenal
+To avoid type checker (e.g. mypy) errors a direct import of experimental
 estimators should be done in the parent module, protected by the
 ``if typing.TYPE_CHECKING`` check. See `sklearn/ensemble/__init__.py
-<https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/ensemble/__init__.py>`_,
+<https://github.com/scikit-learn/scikit-learn/blob/c9c89cfc85dd8dfefd7921c16c87327d03140a06/sklearn/ensemble/__init__.py>`_,
 or `sklearn/impute/__init__.py
-<https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/impute/__init__.py>`_
+<https://github.com/scikit-learn/scikit-learn/blob/c9c89cfc85dd8dfefd7921c16c87327d03140a06/sklearn/impute/__init__.py>`_
 for an example.
 
 Please also write basic tests following those in
 `test_enable_hist_gradient_boosting.py
-<https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py>`_.
+<https://github.com/scikit-learn/scikit-learn/blob/c9c89cfc85dd8dfefd7921c16c87327d03140a06/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py>`__.
+
 
 Make sure every user-facing code you write explicitly mentions that the feature
 is experimental, and add a ``# noqa`` comment to avoid pep8-related warnings::
@@ -315,3 +417,14 @@ sklearn.experimental import *`` **does n
 
 Note that some experimental classes / functions are not included in the
 :mod:`sklearn.experimental` module: ``sklearn.datasets.fetch_openml``.
+
+Once the feature become stable, remove all `enable_my_experimental_feature`
+in the scikit-learn code (even feature highlights etc.) and make the
+`enable_my_experimental_feature` a no-op that just raises a warning:
+`enable_hist_gradient_boosting.py
+<https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/experimental/enable_hist_gradient_boosting.py>`__.
+The file should stay there indefinitely as we don't want to break users code:
+we just incentivize them to remove that import with the warning.
+
+Also update the tests accordingly: `test_enable_hist_gradient_boosting.py
+<https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py>`__.
diff -pruN 0.23.2-5/doc/developers/minimal_reproducer.rst 1.1.1-1/doc/developers/minimal_reproducer.rst
--- 0.23.2-5/doc/developers/minimal_reproducer.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/developers/minimal_reproducer.rst	2022-05-19 12:16:26.436782000 +0000
@@ -0,0 +1,434 @@
+.. _minimal_reproducer:
+
+==============================================
+Crafting a minimal reproducer for scikit-learn
+==============================================
+
+
+Whether submitting a bug report, designing a suite of tests, or simply posting a
+question in the discussions, being able to craft minimal, reproducible examples
+(or minimal, workable examples) is the key to communicating effectively and
+efficiently with the community.
+
+There are very good guidelines on the internet such as `this StackOverflow
+document <https://stackoverflow.com/help/mcve>`_ or `this blogpost by Matthew
+Rocklin <https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports>`_
+on crafting Minimal Complete Verifiable Examples (referred below as MCVE).
+Our goal is not to be repetitive with those references but rather to provide a
+step-by-step guide on how to narrow down a bug until you have reached the
+shortest possible code to reproduce it.
+
+The first step before submitting a bug report to scikit-learn is to read the
+`Issue template
+<https://github.com/scikit-learn/scikit-learn/blob/main/.github/ISSUE_TEMPLATE/bug_report.yml>`_.
+It is already quite informative about the information you will be asked to
+provide.
+
+
+.. _good_practices:
+
+Good practices
+==============
+
+In this section we will focus on the **Steps/Code to Reproduce** section of the
+`Issue template
+<https://github.com/scikit-learn/scikit-learn/blob/main/.github/ISSUE_TEMPLATE/bug_report.yml>`_.
+We will start with a snippet of code that already provides a failing example but
+that has room for readability improvement. We then craft a MCVE from it.
+
+**Example**
+
+.. code-block:: python
+
+    # I am currently working in a ML project and when I tried to fit a
+    # GradientBoostingRegressor instance to my_data.csv I get a UserWarning:
+    # "X has feature names, but DecisionTreeRegressor was fitted without
+    # feature names". You can get a copy of my dataset from
+    # https://example.com/my_data.csv and verify my features do have
+    # names. The problem seems to arise during fit when I pass an integer
+    # to the n_iter_no_change parameter.
+
+    df = pd.read_csv('my_data.csv')
+    X = df[["feature_name"]] # my features do have names
+    y = df["target"]
+
+    # We set random_state=42 for the train_test_split
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.33, random_state=42
+    )
+
+    scaler = StandardScaler(with_mean=False)
+    X_train = scaler.fit_transform(X_train)
+    X_test = scaler.transform(X_test)
+
+    # An instance with default n_iter_no_change raises no error nor warnings
+    gbdt = GradientBoostingRegressor(random_state=0)
+    gbdt.fit(X_train, y_train)
+    default_score = gbdt.score(X_test, y_test)
+
+    # the bug appears when I change the value for n_iter_no_change
+    gbdt = GradientBoostingRegressor(random_state=0, n_iter_no_change=5)
+    gbdt.fit(X_train, y_train)
+    other_score = gbdt.score(X_test, y_test)
+
+    other_score = gbdt.score(X_test, y_test)
+
+
+Provide a failing code example with minimal comments
+----------------------------------------------------
+
+Writing instructions to reproduce the problem in English is often ambiguous.
+Better make sure that all the necessary details to reproduce the problem are
+illustrated in the Python code snippet to avoid any ambiguity. Besides, by this
+point you already provided a concise description in the **Describe the bug**
+section of the `Issue template
+<https://github.com/scikit-learn/scikit-learn/blob/main/.github/ISSUE_TEMPLATE/bug_report.yml>`_.
+
+The following code, while **still not minimal**, is already **much better**
+because it can be copy-pasted in a Python terminal to reproduce the problem in
+one step. In particular:
+
+    - it contains **all necessary imports statements**;
+    - it can fetch the public dataset without having to manually download a
+      file and put it in the expected location on the disk.
+
+**Improved example**
+
+.. code-block:: python
+
+    import pandas as pd
+
+    df = pd.read_csv("https://example.com/my_data.csv")
+    X = df[["feature_name"]]
+    y = df["target"]
+
+    from sklearn.model_selection import train_test_split
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.33, random_state=42
+    )
+
+    from sklearn.preprocessing import StandardScaler
+
+    scaler = StandardScaler(with_mean=False)
+    X_train = scaler.fit_transform(X_train)
+    X_test = scaler.transform(X_test)
+
+    from sklearn.ensemble import GradientBoostingRegressor
+
+    gbdt = GradientBoostingRegressor(random_state=0)
+    gbdt.fit(X_train, y_train)  # no warning
+    default_score = gbdt.score(X_test, y_test)
+
+    gbdt = GradientBoostingRegressor(random_state=0, n_iter_no_change=5)
+    gbdt.fit(X_train, y_train)  # raises warning
+    other_score = gbdt.score(X_test, y_test)
+    other_score = gbdt.score(X_test, y_test)
+
+
+Boil down your script to something as small as possible
+-------------------------------------------------------
+
+You have to ask yourself which lines of code are relevant and which are not for
+reproducing the bug. Deleting unnecessary lines of code or simplifying the
+function calls by omitting unrelated non-default options will help you and other
+contributors narrow down the cause of the bug.
+
+In particular, for this specific example:
+
+- the warning has nothing to do with the `train_test_split` since it already
+  appears in the training step, before we use the test set.
+- similarly, the lines that compute the scores on the test set are not
+  necessary;
+- the bug can be reproduced for any value of `random_state` so leave it to its
+  default;
+- the bug can be reproduced without preprocessing the data with the
+  `StandardScaler`.
+
+**Improved example**
+
+.. code-block:: python
+
+    import pandas as pd
+    df = pd.read_csv("https://example.com/my_data.csv")
+    X = df[["feature_name"]]
+    y = df["target"]
+
+    from sklearn.ensemble import GradientBoostingRegressor
+
+    gbdt = GradientBoostingRegressor()
+    gbdt.fit(X, y)  # no warning
+
+    gbdt = GradientBoostingRegressor(n_iter_no_change=5)
+    gbdt.fit(X, y)  # raises warning
+
+
+**DO NOT** report your data unless it is extremely necessary
+------------------------------------------------------------
+
+The idea is to make the code as self-contained as possible. For doing so, you
+can use a :ref:`synth_data`. It can be generated using numpy, pandas or the
+:mod:`sklearn.datasets` module. Most of the times the bug is not related to a
+particular structure of your data. Even if it is, try to find an available
+dataset that has similar characteristics to yours and that reproduces the
+problem. In this particular case, we are interested in data that has labeled
+feature names.
+
+**Improved example**
+
+.. code-block:: python
+
+    import pandas as pd
+    from sklearn.ensemble import GradientBoostingRegressor
+
+    df = pd.DataFrame(
+        {
+            "feature_name": [-12.32, 1.43, 30.01, 22.17],
+            "target": [72, 55, 32, 43],
+        }
+    )
+    X = df[["feature_name"]]
+    y = df["target"]
+
+    gbdt = GradientBoostingRegressor()
+    gbdt.fit(X, y) # no warning
+    gbdt = GradientBoostingRegressor(n_iter_no_change=5)
+    gbdt.fit(X, y) # raises warning
+
+As already mentioned, the key to communication is the readability of the code
+and good formatting can really be a plus. Notice that in the previous snippet
+we:
+
+    - try to limit all lines to a maximum of 79 characters to avoid horizontal
+      scrollbars in the code snippets blocks rendered on the GitHub issue;
+    - use blank lines to separate groups of related functions;
+    - place all the imports in their own group at the beginning.
+
+The simplification steps presented in this guide can be implemented in a
+different order than the progression we have shown here. The important points
+are:
+
+    - a minimal reproducer should be runnable by a simple copy-and-paste in a
+      python terminal;
+    - it should be simplified as much as possible by removing any code steps
+      that are not strictly needed to reproducing the original problem;
+    - it should ideally only rely on a minimal dataset generated on-the-fly by
+      running the code instead of relying on external data, if possible.
+
+
+Use markdown formatting
+-----------------------
+
+To format code or text into its own distinct block, use triple backticks.
+`Markdown
+<https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax>`_
+supports an optional language identifier to enable syntax highlighting in your
+fenced code block. For example::
+
+    ```python
+    from sklearn.datasets import make_blobs
+
+    n_samples = 100
+    n_components = 3
+    X, y = make_blobs(n_samples=n_samples, centers=n_components)
+    ```
+
+will render a python formatted snippet as follows
+
+.. code-block:: python
+
+    from sklearn.datasets import make_blobs
+
+    n_samples = 100
+    n_components = 3
+    X, y = make_blobs(n_samples=n_samples, centers=n_components)
+
+It is not necessary to create several blocks of code when submitting a bug
+report. Remember other reviewers are going to copy-paste your code and having a
+single cell will make their task easier.
+
+In the section named **Actual results** of the `Issue template
+<https://github.com/scikit-learn/scikit-learn/blob/main/.github/ISSUE_TEMPLATE/bug_report.yml>`_
+you are asked to provide the error message including the full traceback of the
+exception. In this case, use the `python-traceback` qualifier. For example::
+
+    ```python-traceback
+    ---------------------------------------------------------------------------
+    TypeError                                 Traceback (most recent call last)
+    <ipython-input-1-a674e682c281> in <module>
+        4 vectorizer = CountVectorizer(input=docs, analyzer='word')
+        5 lda_features = vectorizer.fit_transform(docs)
+    ----> 6 lda_model = LatentDirichletAllocation(
+        7     n_topics=10,
+        8     learning_method='online',
+
+    TypeError: __init__() got an unexpected keyword argument 'n_topics'
+    ```
+
+yields the following when rendered:
+
+.. code-block:: python
+
+    ---------------------------------------------------------------------------
+    TypeError                                 Traceback (most recent call last)
+    <ipython-input-1-a674e682c281> in <module>
+        4 vectorizer = CountVectorizer(input=docs, analyzer='word')
+        5 lda_features = vectorizer.fit_transform(docs)
+    ----> 6 lda_model = LatentDirichletAllocation(
+        7     n_topics=10,
+        8     learning_method='online',
+
+    TypeError: __init__() got an unexpected keyword argument 'n_topics'
+
+
+.. _synth_data:
+
+Synthetic dataset
+=================
+
+Before choosing a particular synthetic dataset, first you have to identify the
+type of problem you are solving: Is it a classification, a regression,
+a clustering, etc?
+
+Once that you narrowed down the type of problem, you need to provide a synthetic
+dataset accordingly. Most of the times you only need a minimalistic dataset.
+Here is a non-exhaustive list of tools that may help you.
+
+NumPy
+-----
+
+NumPy tools such as `numpy.random.randn
+<https://numpy.org/doc/stable/reference/random/generated/numpy.random.randn.html>`_
+and `numpy.random.randint
+<https://numpy.org/doc/stable/reference/random/generated/numpy.random.randint.html>`_
+can be used to create dummy numeric data.
+
+- regression
+
+    Regressions take continuous numeric data as features and target.
+
+    .. code-block:: python
+
+        import numpy as np
+
+        rng = np.random.RandomState(0)
+        n_samples, n_features = 5, 5
+        X = rng.randn(n_samples, n_features)
+        y = rng.randn(n_samples)
+
+A similar snippet can be used as synthetic data when testing scaling tools such
+as :class:`sklearn.preprocessing.StandardScaler`.
+
+- classification
+
+    If the bug is not raised during when encoding a categorical variable, you can
+    feed numeric data to a classifier. Just remember to ensure that the target
+    is indeed an integer.
+
+    .. code-block:: python
+
+        import numpy as np
+
+        rng = np.random.RandomState(0)
+        n_samples, n_features = 5, 5
+        X = rng.randn(n_samples, n_features)
+        y = rng.randint(0, 2, n_samples)  # binary target with values in {0, 1}
+
+
+    If the bug only happens with non-numeric class labels, you might want to
+    generate a random target with `numpy.random.choice
+    <https://numpy.org/doc/stable/reference/random/generated/numpy.random.choice.html>`_.
+
+    .. code-block:: python
+
+        import numpy as np
+
+        rng = np.random.RandomState(0)
+        n_samples, n_features = 50, 5
+        X = rng.randn(n_samples, n_features)
+        y = np.random.choice(
+            ["male", "female", "other"], size=n_samples, p=[0.49, 0.49, 0.02]
+        )
+
+Pandas
+------
+
+Some scikit-learn objects expect pandas dataframes as input. In this case you can
+transform numpy arrays into pandas objects using `pandas.DataFrame
+<https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_, or
+`pandas.Series
+<https://pandas.pydata.org/docs/reference/api/pandas.Series.html>`_.
+
+.. code-block:: python
+
+    import numpy as np
+    import pandas as pd
+
+    rng = np.random.RandomState(0)
+    n_samples, n_features = 5, 5
+    X = pd.DataFrame(
+        {
+            "continuous_feature": rng.randn(n_samples),
+            "positive_feature": rng.uniform(low=0.0, high=100.0, size=n_samples),
+            "categorical_feature": rng.choice(["a", "b", "c"], size=n_samples),
+        }
+    )
+    y = pd.Series(rng.randn(n_samples))
+
+In addition, scikit-learn includes various :ref:`sample_generators` that can be
+used to build artificial datasets of controlled size and complexity.
+
+`make_regression`
+-----------------
+
+As hinted by the name, :class:`sklearn.datasets.make_regression` produces
+regression targets with noise as an optionally-sparse random linear combination
+of random features.
+
+.. code-block:: python
+
+    from sklearn.datasets import make_regression
+
+    X, y = make_regression(n_samples=1000, n_features=20)
+
+`make_classification`
+---------------------
+
+:class:`sklearn.datasets.make_classification` creates multiclass datasets with multiple Gaussian
+clusters per class. Noise can be introduced by means of correlated, redundant or
+uninformative features.
+
+.. code-block:: python
+
+    from sklearn.datasets import make_classification
+
+    X, y = make_classification(
+        n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1
+    )
+
+`make_blobs`
+------------
+
+Similarly to `make_classification`, :class:`sklearn.datasets.make_blobs` creates
+multiclass datasets using normally-distributed clusters of points. It provides
+greater control regarding the centers and standard deviations of each cluster,
+and therefore it is useful to demonstrate clustering.
+
+.. code-block:: python
+
+    from sklearn.datasets import make_blobs
+
+    X, y = make_blobs(n_samples=10, centers=3, n_features=2)
+
+Dataset loading utilities
+-------------------------
+
+You can use the :ref:`datasets` to load and fetch several popular reference
+datasets. This option is useful when the bug relates to the particular structure
+of the data, e.g. dealing with missing values or image recognition.
+
+.. code-block:: python
+
+    from sklearn.datasets import load_breast_cancer
+
+    X, y = load_breast_cancer(return_X_y=True)
diff -pruN 0.23.2-5/doc/developers/performance.rst 1.1.1-1/doc/developers/performance.rst
--- 0.23.2-5/doc/developers/performance.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/developers/performance.rst	2022-05-19 12:16:26.436782000 +0000
@@ -74,8 +74,10 @@ following:
 
 When using Cython, use either
 
-   $ python setup.py build_ext -i
-   $ python setup.py install
+.. prompt:: bash $
+
+  python setup.py build_ext -i
+  python setup.py install
 
 to generate C files. You are responsible for adding .c/.cpp extensions along
 with build parameters in each submodule ``setup.py``.
@@ -179,22 +181,25 @@ It is however still interesting to check
 ``_nls_subproblem`` function which is the hotspot if we only consider
 Python code: it takes around 100% of the accumulated time of the module. In
 order to better understand the profile of this specific function, let
-us install ``line_profiler`` and wire it to IPython::
+us install ``line_profiler`` and wire it to IPython:
+
+.. prompt:: bash $
 
-  $ pip install line_profiler
+  pip install line_profiler
 
-- **Under IPython 0.13+**, first create a configuration profile::
+- **Under IPython 0.13+**, first create a configuration profile:
 
-    $ ipython profile create
+.. prompt:: bash $
 
-  Then register the line_profiler extension in
-  ``~/.ipython/profile_default/ipython_config.py``::
+  ipython profile create
+
+Then register the line_profiler extension in
+``~/.ipython/profile_default/ipython_config.py``::
 
     c.TerminalIPythonApp.extensions.append('line_profiler')
     c.InteractiveShellApp.extensions.append('line_profiler')
 
-  This will register the ``%lprun`` magic command in the IPython terminal
-  application and the other frontends such as qtconsole and notebook.
+This will register the ``%lprun`` magic command in the IPython terminal application and the other frontends such as qtconsole and notebook.
 
 Now restart IPython and let us use this new toy::
 
@@ -252,26 +257,30 @@ Memory usage profiling
 
 You can analyze in detail the memory usage of any Python code with the help of
 `memory_profiler <https://pypi.org/project/memory_profiler/>`_. First,
-install the latest version::
+install the latest version:
+
+.. prompt:: bash $
 
-    $ pip install -U memory_profiler
+  pip install -U memory_profiler
 
 Then, setup the magics in a manner similar to ``line_profiler``.
 
-- **Under IPython 0.11+**, first create a configuration profile::
+- **Under IPython 0.11+**, first create a configuration profile:
 
-    $ ipython profile create
+.. prompt:: bash $
+  
+    ipython profile create
 
-  Then register the extension in
-  ``~/.ipython/profile_default/ipython_config.py``
-  alongside the line profiler::
+
+Then register the extension in
+``~/.ipython/profile_default/ipython_config.py``
+alongside the line profiler::
 
     c.TerminalIPythonApp.extensions.append('memory_profiler')
     c.InteractiveShellApp.extensions.append('memory_profiler')
 
-  This will register the ``%memit`` and ``%mprun`` magic commands in the
-  IPython terminal application and the other frontends such as qtconsole and
-  notebook.
+This will register the ``%memit`` and ``%mprun`` magic commands in the
+IPython terminal application and the other frontends such as qtconsole and   notebook.
 
 ``%mprun`` is useful to examine, line-by-line, the memory usage of key
 functions in your program. It is very similar to ``%lprun``, discussed in the
@@ -388,11 +397,15 @@ kcachegrind
 ~~~~~~~~~~~
 
 ``yep`` can be used to create a profiling report.
-``kcachegrind`` provides a graphical environment to visualize this report::
+``kcachegrind`` provides a graphical environment to visualize this report:
+
+.. prompt:: bash $
 
   # Run yep to profile some python script
   python -m yep -c my_file.py
 
+.. prompt:: bash $
+
   # open my_file.py.callgrin with kcachegrind
   kcachegrind my_file.py.prof
 
diff -pruN 0.23.2-5/doc/developers/plotting.rst 1.1.1-1/doc/developers/plotting.rst
--- 0.23.2-5/doc/developers/plotting.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/developers/plotting.rst	2022-05-19 12:16:26.436782000 +0000
@@ -18,12 +18,14 @@ stored and the plotting is done in a `pl
 `__init__` method contains only the data needed to create the visualization.
 The `plot` method takes in parameters that only have to do with visualization,
 such as a matplotlib axes. The `plot` method will store the matplotlib artists
-as attributes allowing for style adjustments through the display object. A
-`plot_*` helper function accepts parameters to do the computation and the
-parameters used for plotting. After the helper function creates the display
-object with the computed values, it calls the display's plot method. Note that
-the `plot` method defines attributes related to matplotlib, such as the line
-artist. This allows for customizations after calling the `plot` method.
+as attributes allowing for style adjustments through the display object. The
+`Display` class should define one or both class methods: `from_estimator` and
+`from_predictions`. These methods allows to create the `Display` object from
+the estimator and some data or from the true and predicted values. After these
+class methods create the display object with the computed values, then call the
+display's plot method. Note that the `plot` method defines attributes related
+to matplotlib, such as the line artist. This allows for customizations after
+calling the `plot` method.
 
 For example, the `RocCurveDisplay` defines the following methods and
 attributes::
@@ -36,20 +38,25 @@ attributes::
            self.roc_auc = roc_auc
            self.estimator_name = estimator_name
 
+       @classmethod
+       def from_estimator(cls, estimator, X, y):
+           # get the predictions
+           y_pred = estimator.predict_proba(X)[:, 1]
+           return cls.from_predictions(y, y_pred, estimator.__class__.__name__)
+
+       @classmethod
+       def from_predictions(cls, y, y_pred, estimator_name):
+           # do ROC computation from y and y_pred
+           fpr, tpr, roc_auc = ...
+           viz = RocCurveDisplay(fpr, tpr, roc_auc, estimator_name)
+           return viz.plot()
+
        def plot(self, ax=None, name=None, **kwargs):
            ...
            self.line_ = ...
            self.ax_ = ax
            self.figure_ = ax.figure_
 
-   def plot_roc_curve(estimator, X, y, pos_label=None, sample_weight=None,
-                      drop_intermediate=True, response_method="auto",
-                      name=None, ax=None, **kwargs):
-       # do computation
-       viz = RocCurveDisplay(fpr, tpr, roc_auc, 
-                                estimator.__class__.__name__)
-       return viz.plot(ax=ax, name=name, **kwargs)
-
 Read more in :ref:`sphx_glr_auto_examples_miscellaneous_plot_roc_curve_visualization_api.py`
 and the :ref:`User Guide <visualizations>`.
 
@@ -57,8 +64,8 @@ Plotting with Multiple Axes
 ---------------------------
 
 Some of the plotting tools like
-:func:`~sklearn.inspection.plot_partial_dependence` and
-:class:`~sklearn.inspection.PartialDependenceDisplay` support plottong on
+:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` and
+:class:`~sklearn.inspection.PartialDependenceDisplay` support plotting on
 multiple axes. Two different scenarios are supported:
 
 1. If a list of axes is passed in, `plot` will check if the number of axes is
@@ -80,8 +87,8 @@ be placed. In this case, we suggest usin
 By default, the `ax` keyword in `plot` is `None`. In this case, the single
 axes is created and the gridspec api is used to create the regions to plot in.
 
-See for example, :func:`~sklearn.inspection.plot_partial_dependence` which
-plots multiple lines and contours using this API. The axes defining the
+See for example, :func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator
+which plots multiple lines and contours using this API. The axes defining the
 bounding box is saved in a `bounding_ax_` attribute. The individual axes
 created are stored in an `axes_` ndarray, corresponding to the axes position on
 the grid. Positions that are not used are set to `None`. Furthermore, the
diff -pruN 0.23.2-5/doc/developers/tips.rst 1.1.1-1/doc/developers/tips.rst
--- 0.23.2-5/doc/developers/tips.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/developers/tips.rst	2022-05-19 12:16:26.436782000 +0000
@@ -55,13 +55,17 @@ Useful pytest aliases and flags
 The full test suite takes fairly long to run. For faster iterations,
 it is possibly to select a subset of tests using pytest selectors.
 In particular, one can run a `single test based on its node ID
-<https://docs.pytest.org/en/latest/example/markers.html#selecting-tests-based-on-their-node-id>`_::
+<https://docs.pytest.org/en/latest/example/markers.html#selecting-tests-based-on-their-node-id>`_:
+
+.. prompt:: bash $
 
   pytest -v sklearn/linear_model/tests/test_logistic.py::test_sparsify
 
 or use the `-k pytest parameter
 <https://docs.pytest.org/en/latest/example/markers.html#using-k-expr-to-select-tests-based-on-their-name>`_
-to select tests based on their name. For instance,::
+to select tests based on their name. For instance,:
+
+.. prompt:: bash $
 
   pytest sklearn/tests/test_common.py -v -k LogisticRegression
 
@@ -74,9 +78,11 @@ When a unit test fails, the following tr
 
   2. The argument ``pytest --pdb`` drops into the Python debugger on failure. To
      instead drop into the rich IPython debugger ``ipdb``, you may set up a
-     shell alias to::
+     shell alias to:
 
-         pytest --pdbcls=IPython.terminal.debugger:TerminalPdb --capture no
+.. prompt:: bash $
+
+    pytest --pdbcls=IPython.terminal.debugger:TerminalPdb --capture no
 
 Other `pytest` options that may become useful include:
 
@@ -110,7 +116,10 @@ replies <https://github.com/settings/rep
 Issue: Usage questions
     ::
 
-        You're asking a usage question. The issue tracker is mainly for bugs and new features. For usage questions, it is recommended to try [Stack Overflow](https://stackoverflow.com/questions/tagged/scikit-learn) or [the Mailing List](https://mail.python.org/mailman/listinfo/scikit-learn).
+        You are asking a usage question. The issue tracker is for bugs and new features. For usage questions, it is recommended to try [Stack Overflow](https://stackoverflow.com/questions/tagged/scikit-learn) or [the Mailing List](https://mail.python.org/mailman/listinfo/scikit-learn).
+
+        Unfortunately, we need to close this issue as this issue tracker is a communication tool used for the development of scikit-learn. The additional activity created by usage questions crowds it too much and impedes this development. The conversation can continue here, however there is no guarantee that is will receive attention from core developers.
+
 
 Issue: You're welcome to update the docs
     ::
@@ -181,7 +190,7 @@ PR-NEW: Fix #
 PR-NEW or Issue: Maintenance cost
     ::
 
-        Every feature we include has a [maintenance cost](http://scikit-learn.org/dev/faq.html#why-are-you-so-selective-on-what-algorithms-you-include-in-scikit-learn). Our maintainers are mostly volunteers. For a new feature to be included, we need evidence that it is often useful and, ideally, [well-established](http://scikit-learn.org/dev/faq.html#what-are-the-inclusion-criteria-for-new-algorithms) in the literature or in practice. That doesn't stop you implementing it for yourself and publishing it in a separate repository, or even [scikit-learn-contrib](https://scikit-learn-contrib.github.io).
+        Every feature we include has a [maintenance cost](http://scikit-learn.org/dev/faq.html#why-are-you-so-selective-on-what-algorithms-you-include-in-scikit-learn). Our maintainers are mostly volunteers. For a new feature to be included, we need evidence that it is often useful and, ideally, [well-established](http://scikit-learn.org/dev/faq.html#what-are-the-inclusion-criteria-for-new-algorithms) in the literature or in practice. Also, we expect PR authors to take part in the maintenance for the code they submit, at least initially. That doesn't stop you implementing it for yourself and publishing it in a separate repository, or even [scikit-learn-contrib](https://scikit-learn-contrib.github.io).
 
 PR-WIP: What's needed before merge?
     ::
@@ -191,7 +200,7 @@ PR-WIP: What's needed before merge?
 PR-WIP: Regression test needed
     ::
 
-        Please add a [non-regression test](https://en.wikipedia.org/wiki/Non-regression_testing) that would fail at master but pass in this PR.
+        Please add a [non-regression test](https://en.wikipedia.org/wiki/Non-regression_testing) that would fail at main but pass in this PR.
 
 PR-WIP: PEP8
     ::
@@ -220,7 +229,7 @@ Debugging memory errors in Cython with v
 
 While python/numpy's built-in memory management is relatively robust, it can
 lead to performance penalties for some routines. For this reason, much of
-the high-performance code in scikit-learn in written in cython. This
+the high-performance code in scikit-learn is written in cython. This
 performance gain comes with a tradeoff, however: it is very easy for memory
 bugs to crop up in cython code, especially in situations where that code
 relies heavily on pointer arithmetic.
@@ -243,9 +252,11 @@ code. Follow these steps:
      python suppressions. If you don't, you will have spurious output coming
      related to the python interpreter instead of your own code.
 
-  4. Run valgrind as follows::
+  4. Run valgrind as follows:
+
+.. prompt:: bash $
 
-       $> valgrind -v --suppressions=valgrind-python.supp python my_test_script.py
+  valgrind -v --suppressions=valgrind-python.supp python my_test_script.py
 
 .. _valgrind: http://valgrind.org
 .. _`README.valgrind`: https://github.com/python/cpython/blob/master/Misc/README.valgrind
@@ -260,3 +271,67 @@ give you clues as to the source of your
 
 For more information on valgrind and the array of options it has, see the
 tutorials and documentation on the `valgrind web site <http://valgrind.org>`_.
+
+.. _arm64_dev_env:
+
+Building and testing for the ARM64 platform on a x86_64 machine
+===============================================================
+
+ARM-based machines are a popular target for mobile, edge or other low-energy
+deployments (including in the cloud, for instance on Scaleway or AWS Graviton).
+
+Here are instructions to setup a local dev environment to reproduce
+ARM-specific bugs or test failures on a x86_64 host laptop or workstation. This
+is based on QEMU user mode emulation using docker for convenience (see
+https://github.com/multiarch/qemu-user-static).
+
+.. note::
+
+    The following instructions are illustrated for ARM64 but they also apply to
+    ppc64le, after changing the Docker image and Miniforge paths appropriately.
+
+Prepare a folder on the host filesystem and download the necessary tools and
+source code:
+
+.. prompt:: bash $
+
+    mkdir arm64
+    pushd arm64
+    wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh
+    git clone https://github.com/scikit-learn/scikit-learn.git
+
+Use docker to install QEMU user mode and run an ARM64v8 container with access
+to your shared folder under the `/io` mount point:
+
+.. prompt:: bash $
+
+    docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
+    docker run -v`pwd`:/io --rm -it arm64v8/ubuntu /bin/bash
+
+In the container, install miniforge3 for the ARM64 (a.k.a. aarch64)
+architecture:
+
+.. prompt:: bash $
+
+    bash Miniforge3-Linux-aarch64.sh
+    # Choose to install miniforge3 under: `/io/miniforge3`
+
+Whenever you restart a new container, you will need to reinit the conda env
+previously installed under `/io/miniforge3`:
+
+.. prompt:: bash $
+
+    /io/miniforge3/bin/conda init
+    source /root/.bashrc
+
+as the `/root` home folder is part of the ephemeral docker container. Every
+file or directory stored under `/io` is persistent on the other hand.
+
+You can then build scikit-learn as usual (you will need to install compiler
+tools and dependencies using apt or conda as usual). Building scikit-learn
+takes a lot of time because of the emulation layer, however it needs to be
+done only once if you put the scikit-learn folder under the `/io` mount
+point.
+
+Then use pytest to run only the tests of the module you are interested in
+debugging.
diff -pruN 0.23.2-5/doc/developers/utilities.rst 1.1.1-1/doc/developers/utilities.rst
--- 0.23.2-5/doc/developers/utilities.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/developers/utilities.rst	2022-05-19 12:16:26.436782000 +0000
@@ -98,7 +98,7 @@ Efficient Linear Algebra & Array Operati
   number of components.
 
 - :func:`arrayfuncs.cholesky_delete`:
-  (used in :func:`sklearn.linear_model.lars_path`)  Remove an
+  (used in :func:`~sklearn.linear_model.lars_path`)  Remove an
   item from a cholesky factorization.
 
 - :func:`arrayfuncs.min_pos`: (used in ``sklearn.linear_model.least_angle``)
@@ -121,7 +121,7 @@ Efficient Linear Algebra & Array Operati
   used in :func:`shuffle`, below.
 
 - :func:`shuffle`: Shuffle arrays or sparse matrices in a consistent way.
-  Used in :func:`sklearn.cluster.k_means`.
+  Used in :func:`~sklearn.cluster.k_means`.
 
 
 Efficient Random Sampling
@@ -141,17 +141,17 @@ efficiently process ``scipy.sparse`` dat
 - :func:`sparsefuncs.mean_variance_axis`: compute the means and
   variances along a specified axis of a CSR matrix.
   Used for normalizing the tolerance stopping criterion in
-  :class:`sklearn.cluster.KMeans`.
+  :class:`~sklearn.cluster.KMeans`.
 
 - :func:`sparsefuncs_fast.inplace_csr_row_normalize_l1` and
   :func:`sparsefuncs_fast.inplace_csr_row_normalize_l2`: can be used to normalize
   individual sparse samples to unit L1 or L2 norm as done in
-  :class:`sklearn.preprocessing.Normalizer`.
+  :class:`~sklearn.preprocessing.Normalizer`.
 
 - :func:`sparsefuncs.inplace_csr_column_scale`: can be used to multiply the
   columns of a CSR matrix by a constant scale (one scale per column).
   Used for scaling features to unit standard deviation in
-  :class:`sklearn.preprocessing.StandardScaler`.
+  :class:`~sklearn.preprocessing.StandardScaler`.
 
 
 Graph Routines
@@ -165,13 +165,6 @@ Graph Routines
   If this is ever needed again, it would be far faster to use a single
   iteration of Dijkstra's algorithm from ``graph_shortest_path``.
 
-- :func:`graph_shortest_path.graph_shortest_path`:
-  (used in :class:`sklearn.manifold.Isomap`)
-  Return the shortest path between all pairs of connected points on a directed
-  or undirected graph.  Both the Floyd-Warshall algorithm and Dijkstra's
-  algorithm are available.  The algorithm is most efficient when the
-  connectivity matrix is a ``scipy.sparse.csr_matrix``.
-
 
 Testing Functions
 =================
@@ -193,8 +186,11 @@ Helper Functions
 ================
 
 - :class:`gen_even_slices`: generator to create ``n``-packs of slices going up
-  to ``n``.  Used in :func:`sklearn.decomposition.dict_learning` and
-  :func:`sklearn.cluster.k_means`.
+  to ``n``.  Used in :func:`~sklearn.decomposition.dict_learning` and
+  :func:`~sklearn.cluster.k_means`.
+
+- :class:`gen_batches`: generator to create slices containing batch size elements 
+  from 0 to ``n``
 
 - :func:`safe_mask`: Helper function to convert a mask to the format expected
   by the numpy array or scipy sparse matrix on which to use it (sparse
@@ -231,5 +227,5 @@ Warnings and Exceptions
 
 - :class:`deprecated`: Decorator to mark a function or class as deprecated.
 
-- :class:`sklearn.exceptions.ConvergenceWarning`: Custom warning to catch
+- :class:`~sklearn.exceptions.ConvergenceWarning`: Custom warning to catch
   convergence problems. Used in ``sklearn.covariance.graphical_lasso``.
diff -pruN 0.23.2-5/doc/faq.rst 1.1.1-1/doc/faq.rst
--- 0.23.2-5/doc/faq.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/faq.rst	2022-05-19 12:16:26.436782000 +0000
@@ -20,7 +20,6 @@ sy-kit learn. sci stands for science!
 Why scikit?
 ------------
 There are multiple scikits, which are scientific toolboxes built around SciPy.
-You can find a list at `<https://scikits.appspot.com/scikits>`_.
 Apart from scikit-learn, another popular one is `scikit-image <https://scikit-image.org/>`_.
 
 How can I contribute to scikit-learn?
@@ -165,7 +164,7 @@ scikit-learn has to offer. If we started
 learning, we'd need to redesign the whole package and the project
 would likely collapse under its own weight.
 
-There are two project with API similar to scikit-learn that
+There are two projects with API similar to scikit-learn that
 do structured prediction:
 
 * `pystruct <https://pystruct.github.io/>`_ handles general structured
@@ -191,10 +190,13 @@ careful choice of algorithms.
 Do you support PyPy?
 --------------------
 
-In case you didn't know, `PyPy <https://pypy.org/>`_ is an alternative
-Python implementation with a built-in just-in-time compiler. Experimental
-support for PyPy3-v5.10+ has been added, which requires Numpy 1.14.0+,
-and scipy 1.1.0+.
+scikit-learn is regularly tested and maintained to work with
+`PyPy <https://pypy.org/>`_ (an alternative Python implementation with
+a built-in just-in-time compiler).
+
+Note however that this support is still considered experimental and specific
+components might behave slightly differently. Please refer to the test
+suite of a the specific module of interest for more details.
 
 How do I deal with string data (or trees, graphs...)?
 -----------------------------------------------------
@@ -261,7 +263,7 @@ state in the child process is corrupted:
 threads while only the main thread state has been forked. It is possible to
 change the libraries to make them detect when a fork happens and reinitialize
 the thread pool in that case: we did that for OpenBLAS (merged upstream in
-master since 0.2.10) and we contributed a `patch
+main since 0.2.10) and we contributed a `patch
 <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035>`_ to GCC's OpenMP runtime
 (not yet reviewed).
 
@@ -327,7 +329,7 @@ You can find more information about addi
 `Will you add GPU support?`_.
 
 Note that scikit-learn currently implements a simple multilayer perceptron
-in `sklearn.neural_network`. We will only accept bug fixes for this module.
+in :mod:`sklearn.neural_network`. We will only accept bug fixes for this module.
 If you want to implement more complex deep learning models, please turn to
 popular deep learning frameworks such as
 `tensorflow <https://www.tensorflow.org/>`_,
@@ -355,22 +357,7 @@ this reason.
 How do I set a ``random_state`` for an entire execution?
 ---------------------------------------------------------
 
-For testing and replicability, it is often important to have the entire execution
-controlled by a single seed for the pseudo-random number generator used in
-algorithms that have a randomized component. Scikit-learn does not use its own
-global random state; whenever a RandomState instance or an integer random seed
-is not provided as an argument, it relies on the numpy global random state,
-which can be set using :func:`numpy.random.seed`.
-For example, to set an execution's numpy global random state to 42, one could
-execute the following in his or her script::
-
-    import numpy as np
-    np.random.seed(42)
-
-However, a global random state is prone to modification by other code during
-execution. Thus, the only way to ensure replicability is to pass ``RandomState``
-instances everywhere and ensure that both estimators and cross-validation
-splitters have their ``random_state`` parameter set.
+Please refer to :ref:`randomness`.
 
 Why do categorical variables need preprocessing in scikit-learn, compared to other tools?
 -----------------------------------------------------------------------------------------
@@ -394,20 +381,61 @@ data structures.
 
 Do you plan to implement transform for target y in a pipeline?
 ----------------------------------------------------------------------------
-Currently transform only works for features X in a pipeline. 
-There's a long-standing discussion about 
+Currently transform only works for features X in a pipeline.
+There's a long-standing discussion about
 not being able to transform y in a pipeline.
 Follow on github issue
 `#4143 <https://github.com/scikit-learn/scikit-learn/issues/4143>`_.
 Meanwhile check out
-:class:`sklearn.compose.TransformedTargetRegressor`,
+:class:`~compose.TransformedTargetRegressor`,
 `pipegraph <https://github.com/mcasl/PipeGraph>`_,
 `imbalanced-learn <https://github.com/scikit-learn-contrib/imbalanced-learn>`_.
-Note that Scikit-learn solved for the case where y 
-has an invertible transformation applied before training 
+Note that Scikit-learn solved for the case where y
+has an invertible transformation applied before training
 and inverted after prediction. Scikit-learn intends to solve for
-use cases where y should be transformed at training time 
-and not at test time, for resampling and similar uses, 
-like at imbalanced learn. 
-In general, these use cases can be solved 
+use cases where y should be transformed at training time
+and not at test time, for resampling and similar uses,
+like at `imbalanced-learn`.
+In general, these use cases can be solved
 with a custom meta estimator rather than a Pipeline
+
+Why are there so many different estimators for linear models?
+-------------------------------------------------------------
+Usually, there is one classifier and one regressor per model type, e.g.
+:class:`~ensemble.GradientBoostingClassifier` and
+:class:`~ensemble.GradientBoostingRegressor`. Both have similar options and
+both have the parameter `loss`, which is especially useful in the regression
+case as it enables the estimation of conditional mean as well as conditional
+quantiles.
+
+For linear models, there are many estimator classes which are very close to
+each other. Let us have a look at
+
+- :class:`~linear_model.LinearRegression`, no penalty
+- :class:`~linear_model.Ridge`, L2 penalty
+- :class:`~linear_model.Lasso`, L1 penalty (sparse models)
+- :class:`~linear_model.ElasticNet`, L1 + L2 penalty (less sparse models)
+- :class:`~linear_model.SGDRegressor` with `loss='squared_loss'`
+
+**Maintainer perspective:**
+They all do in principle the same and are different only by the penalty they
+impose. This, however, has a large impact on the way the underlying
+optimization problem is solved. In the end, this amounts to usage of different
+methods and tricks from linear algebra. A special case is `SGDRegressor` which
+comprises all 4 previous models and is different by the optimization procedure.
+A further side effect is that the different estimators favor different data
+layouts (`X` c-contiguous or f-contiguous, sparse csr or csc). This complexity
+of the seemingly simple linear models is the reason for having different
+estimator classes for different penalties.
+
+**User perspective:**
+First, the current design is inspired by the scientific literature where linear
+regression models with different regularization/penalty were given different
+names, e.g. *ridge regression*. Having different model classes with according
+names makes it easier for users to find those regression models.
+Secondly, if all the 5 above mentioned linear models were unified into a single
+class, there would be parameters with a lot of options like the ``solver``
+parameter. On top of that, there would be a lot of exclusive interactions
+between different parameters. For example, the possible options of the
+parameters ``solver``, ``precompute`` and ``selection`` would depend on the
+chosen values of the penalty parameters ``alpha`` and ``l1_ratio``.
diff -pruN 0.23.2-5/doc/getting_started.rst 1.1.1-1/doc/getting_started.rst
--- 0.23.2-5/doc/getting_started.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/getting_started.rst	2022-05-19 12:16:26.436782000 +0000
@@ -9,8 +9,8 @@ etc.). Please refer to our :ref:`install
 
 ``Scikit-learn`` is an open source machine learning library that supports
 supervised and unsupervised learning. It also provides various tools for
-model fitting, data preprocessing, model selection and evaluation, and many
-other utilities.
+model fitting, data preprocessing, model selection, model evaluation,
+and many other utilities.
 
 Fitting and predicting: estimator basics
 ----------------------------------------
@@ -69,6 +69,7 @@ newly transformed sample matrix ``X``::
   >>> from sklearn.preprocessing import StandardScaler
   >>> X = [[0, 15],
   ...      [1, -10]]
+  >>> # scale data according to computed scaling values
   >>> StandardScaler().fit(X).transform(X)
   array([[-1.,  1.],
          [ 1., -1.]])
@@ -101,7 +102,7 @@ the test data::
   >>> # create a pipeline object
   >>> pipe = make_pipeline(
   ...     StandardScaler(),
-  ...     LogisticRegression(random_state=0)
+  ...     LogisticRegression()
   ... )
   ...
   >>> # load the iris dataset and split it into train and test sets
@@ -111,7 +112,7 @@ the test data::
   >>> # fit the whole pipeline
   >>> pipe.fit(X_train, y_train)
   Pipeline(steps=[('standardscaler', StandardScaler()),
-                  ('logisticregression', LogisticRegression(random_state=0))])
+                  ('logisticregression', LogisticRegression())])
   >>> # we can now use it like any other estimator
   >>> accuracy_score(pipe.predict(X_test), y_test)
   0.97...
diff -pruN 0.23.2-5/doc/glossary.rst 1.1.1-1/doc/glossary.rst
--- 0.23.2-5/doc/glossary.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/glossary.rst	2022-05-19 12:16:26.436782000 +0000
@@ -171,7 +171,7 @@ General Concepts
         one-hot encode categorical features.
         See also :ref:`preprocessing_categorical_features` and the
         `categorical-encoding
-        <https://contrib.scikit-learn.org/categorical-encoding>`_
+        <https://github.com/scikit-learn-contrib/category_encoders>`_
         package for tools related to encoding categorical features.
 
     clone
@@ -186,6 +186,13 @@ General Concepts
         :class:`~pipeline.Pipeline` and
         :class:`~pipeline.FeatureUnion`.)
 
+        If the estimator's `random_state` parameter is an integer (or if the
+        estimator doesn't have a `random_state` parameter), an *exact clone*
+        is returned: the clone and the original estimator will give the exact
+        same results. Otherwise, *statistical clone* is returned: the clone
+        might yield different results from the original estimator. More
+        details can be found in :ref:`randomness`.
+
     common tests
         This refers to the tests run on almost every estimator class in
         Scikit-learn to check they comply with basic API conventions.  They are
@@ -200,7 +207,7 @@ General Concepts
 
     deprecation
         We use deprecation to slowly violate our :term:`backwards
-        compatibility` assurances, usually to to:
+        compatibility` assurances, usually to:
 
         * change the default value of a parameter; or
         * remove a parameter, attribute, method, class, etc.
@@ -248,6 +255,13 @@ General Concepts
         or vectorizing.  Our estimators do not work with struct arrays, for
         instance.
 
+        Our documentation can sometimes give information about the dtype
+        precision, e.g. `np.int32`, `np.int64`, etc. When the precision is
+        provided, it refers to the NumPy dtype. If an arbitrary precision is
+        used, the documentation will refer to dtype `integer` or `floating`.
+        Note that in this case, the precision can be platform dependent.
+        The `numeric` dtype refers to accepting both `integer` and `floating`.
+
         TODO: Mention efficiency and precision issues; casting policy.
 
     duck typing
@@ -321,6 +335,12 @@ General Concepts
         * sometimes in the :ref:`User Guide <user_guide>` (built from ``doc/``)
           alongside a technical description of the estimator.
 
+    experimental
+        An experimental tool is already usable but its public API, such as
+        default parameter values or fitted attributes, is still subject to
+        change in future versions without the usual :term:`deprecation`
+        warning policy.
+
     evaluation metric
     evaluation metrics
         Evaluation metrics give a measure of how well a model performs.  We may
@@ -363,17 +383,6 @@ General Concepts
                 adopted on a :term:`meta-estimator`.  Its value should usually be
                 checked by way of a helper such as :func:`base.is_classifier`.
 
-            ``_pairwise``
-                This boolean attribute indicates whether the data (``X``) passed to
-                :func:`fit` and similar methods consists of pairwise measures over
-                samples rather than a feature representation for each sample.  It
-                is usually ``True`` where an estimator has a ``metric`` or
-                ``affinity`` or ``kernel`` parameter with value 'precomputed'.
-                Its primary purpose is that when a :term:`meta-estimator`
-                extracts a sub-sample of data intended for a pairwise estimator,
-                the data needs to be indexed on both axes, while other data is
-                indexed only on the first axis.
-
         For more detailed info, see :ref:`estimator_tags`.
 
     feature
@@ -504,8 +513,9 @@ General Concepts
         representation of missing values in float arrays.  If the array has
         integer dtype, NaN cannot be represented. For this reason, we support
         specifying another ``missing_values`` value when :term:`imputation` or
-        learning can be performed in integer space.  :term:`Unlabeled data`
-        is a special case of missing values in the :term:`target`.
+        learning can be performed in integer space.
+        :term:`Unlabeled data <unlabeled data>` is a special case of missing
+        values in the :term:`target`.
 
     ``n_features``
         The number of :term:`features`.
@@ -617,9 +627,8 @@ General Concepts
 
         Note that for most distance metrics, we rely on implementations from
         :mod:`scipy.spatial.distance`, but may reimplement for efficiency in
-        our context.  The :mod:`neighbors` module also duplicates some metric
-        implementations for integration with efficient binary tree search data
-        structures.
+        our context. The :class:`metrics.DistanceMetric` interface is used to implement
+        distance metrics for integration with efficient neighbors search.
 
     pd
         A shorthand for `Pandas <https://pandas.pydata.org>`_ due to the
@@ -638,9 +647,9 @@ General Concepts
         sample and each column to a training sample.
 
         Use of precomputed X is usually indicated by setting a ``metric``,
-        ``affinity`` or ``kernel`` parameter to the string 'precomputed'.  An
-        estimator should mark itself as being :term:`_pairwise` if this is the
-        case.
+        ``affinity`` or ``kernel`` parameter to the string 'precomputed'. If
+        this is the case, then the estimator should set the `pairwise`
+        estimator tag as True.
 
     rectangular
         Data that can be represented as a matrix with :term:`samples` on the
@@ -867,6 +876,7 @@ Class APIs and Estimator Types
         * :term:`fit`
         * :term:`transform`
         * :term:`get_feature_names`
+        * :term:`get_feature_names_out`
 
     meta-estimator
     meta-estimators
@@ -896,7 +906,7 @@ Class APIs and Estimator Types
         possible to identify which methods are provided by the underlying
         estimator until the meta-estimator has been :term:`fitted` (see also
         :term:`duck typing`), for which
-        :func:`utils.metaestimators.if_delegate_has_method` may help.  It
+        :func:`utils.metaestimators.available_if` may help.  It
         should also provide (or modify) the :term:`estimator tags` and
         :term:`classes_` attribute provided by the base estimator.
 
@@ -980,12 +990,15 @@ such as:
         Cross-validation estimators are named `EstimatorCV` and tend to be
         roughly equivalent to `GridSearchCV(Estimator(), ...)`. The
         advantage of using a cross-validation estimator over the canonical
-        :term:`Estimator` class along with :ref:`grid search <grid_search>` is
+        :term:`estimator` class along with :ref:`grid search <grid_search>` is
         that they can take advantage of warm-starting by reusing precomputed
         results in the previous steps of the cross-validation process. This
         generally leads to speed improvements. An exception is the
         :class:`RidgeCV <linear_model.RidgeCV>` class, which can instead
-        perform efficient Leave-One-Out CV.
+        perform efficient Leave-One-Out (LOO) CV. By default, all these
+        estimators, apart from :class:`RidgeCV <linear_model.RidgeCV>` with an
+        LOO-CV, will be refitted on the full training dataset after finding the
+        best combination of hyper-parameters.
 
     scorer
         A non-estimator callable object which evaluates an estimator on given
@@ -995,7 +1008,7 @@ such as:
 
 Further examples:
 
-* :class:`neighbors.DistanceMetric`
+* :class:`metrics.DistanceMetric`
 * :class:`gaussian_process.kernels.Kernel`
 * ``tree.Criterion``
 
@@ -1036,7 +1049,9 @@ Target Types
         identified as 'multiclass'.
 
     continuous multioutput
+    continuous multi-output
     multioutput continuous
+    multi-output continuous
         A regression problem where each sample's target consists of ``n_outputs``
         :term:`outputs`, each one a finite floating point number, for a
         fixed int ``n_outputs > 1`` in a particular dataset.
@@ -1051,6 +1066,7 @@ Target Types
         'multiclass-multioutput'.
 
     multiclass
+    multi-class
         A classification problem consisting of more than two classes.  A
         multiclass target may be represented as a 1-dimensional array of
         strings or integers.  A 2d column vector of integers (i.e. a
@@ -1063,7 +1079,7 @@ Target Types
         For semi-supervised classification, :term:`unlabeled` samples should
         have the special label -1 in ``y``.
 
-        Within sckit-learn, all estimators supporting binary classification
+        Within scikit-learn, all estimators supporting binary classification
         also support multiclass classification, using One-vs-Rest by default.
 
         A :class:`preprocessing.LabelEncoder` helps to canonicalize multiclass
@@ -1074,7 +1090,9 @@ Target Types
         identically to 'multiclass'.
 
     multiclass multioutput
+    multi-class multi-output
     multioutput multiclass
+    multi-output multi-class
         A classification problem where each sample's target consists of
         ``n_outputs`` :term:`outputs`, each a class label, for a fixed int
         ``n_outputs > 1`` in a particular dataset.  Each output has a
@@ -1100,6 +1118,7 @@ Target Types
         'multiclass-multioutput' for multiclass multioutput input.
 
     multilabel
+    multi-label
         A :term:`multiclass multioutput` target where each output is
         :term:`binary`.  This may be represented as a 2d (dense) array or
         sparse matrix of integers, such that each column is a separate binary
@@ -1206,8 +1225,8 @@ Methods
         return the same value, wherein training data needs to be handled
         differently (due to model blending in stacked ensembles, for instance;
         such cases should be clearly documented).
-        :term:`Transductive` transformers may also provide ``fit_transform``
-        but not :term:`transform`.
+        :term:`Transductive <transductive>` transformers may also provide
+        ``fit_transform`` but not :term:`transform`.
 
         One reason to implement ``fit_transform`` is that performing ``fit``
         and ``transform`` separately would be less efficient than together.
@@ -1229,6 +1248,17 @@ Methods
         to the names of input columns from which output column names can
         be generated.  By default input features are named x0, x1, ....
 
+    ``get_feature_names_out``
+        Primarily for :term:`feature extractors`, but also used for other
+        transformers to provide string names for each column in the output of
+        the estimator's :term:`transform` method.  It outputs an array of
+        strings and may take an array-like of strings as input, corresponding
+        to the names of input columns from which output column names can
+        be generated.  If `input_features` is not passed in, then the
+        `feature_names_in_` attribute will be used. If the
+        `feature_names_in_` attribute is not defined, then the
+        input names are named `[x0, x1, ..., x(n_features_in_ - 1)]`.
+
     ``get_n_splits``
         On a :term:`CV splitter` (not an estimator), returns the number of
         elements one would get if iterating through the return value of
@@ -1295,9 +1325,10 @@ Methods
 
         classifier
             An array of shape ``(n_samples,)`` ``(n_samples, n_outputs)``.
-            :term:`Multilabel` data may be represented as a sparse matrix if
-            a sparse matrix was used in fitting. Each element should be one
-            of the values in the classifier's :term:`classes_` attribute.
+            :term:`Multilabel <multilabel>` data may be represented as a sparse
+            matrix if a sparse matrix was used in fitting. Each element should
+            be one of the values in the classifier's :term:`classes_`
+            attribute.
 
         clusterer
             An array of shape ``(n_samples,)`` where each value is from 0 to
@@ -1524,6 +1555,9 @@ functions or non-estimator constructors.
         generally be interpreted as ``n_jobs=1``, unless the current
         :class:`joblib.Parallel` backend context specifies otherwise.
 
+        Note that even if ``n_jobs=1``, low-level parallelism (via Numpy and OpenMP)
+        might be used in some configuration.
+
         For more details on the use of ``joblib`` and its interactions with
         scikit-learn, please refer to our :ref:`parallelism notes
         <parallelism>`.
@@ -1559,6 +1593,7 @@ functions or non-estimator constructors.
             number of different distinct random seeds. Popular integer
             random seeds are 0 and `42
             <https://en.wikipedia.org/wiki/Answer_to_the_Ultimate_Question_of_Life%2C_the_Universe%2C_and_Everything>`_.
+            Integer values must be in the range `[0, 2**32 - 1]`.
 
         A :class:`numpy.random.RandomState` instance
             Use the provided random state, only affecting other users
@@ -1570,6 +1605,9 @@ functions or non-estimator constructors.
         input ``random_state`` and return a :class:`~numpy.random.RandomState`
         instance.
 
+        For more details on how to control the randomness of scikit-learn
+        objects and avoid common pitfalls, you may refer to :ref:`randomness`.
+
     ``scoring``
         Specifies the score function to be maximized (usually by :ref:`cross
         validation <cross_validation>`), or -- in some cases -- multiple score
@@ -1581,10 +1619,10 @@ functions or non-estimator constructors.
         in the User Guide.
 
         Where multiple metrics can be evaluated, ``scoring`` may be given
-        either as a list of unique strings or a dictionary with names as keys
-        and callables as values. Note that this does *not* specify which score
-        function is to be maximized, and another parameter such as ``refit``
-        maybe used for this purpose.
+        either as a list of unique strings, a dictionary with names as keys and
+        callables as values or a callable that returns a dictionary. Note that
+        this does *not* specify which score function is to be maximized, and
+        another parameter such as ``refit`` maybe used for this purpose.
 
 
         The ``scoring`` parameter is validated and interpreted using
@@ -1658,7 +1696,7 @@ See concept :term:`attribute`.
         predictors.
 
     ``coef_``
-        The weight/coefficient matrix of a generalised linear model
+        The weight/coefficient matrix of a generalized linear model
         :term:`predictor`, of shape ``(n_features,)`` for binary classification
         and single-output regression, ``(n_classes, n_features)`` for
         multiclass classification and ``(n_targets, n_features)`` for
diff -pruN 0.23.2-5/doc/governance.rst 1.1.1-1/doc/governance.rst
--- 0.23.2-5/doc/governance.rst	2020-08-04 12:12:58.860675300 +0000
+++ 1.1.1-1/doc/governance.rst	2022-05-19 12:16:26.436782000 +0000
@@ -22,12 +22,51 @@ Roles And Responsibilities
 
 Contributors
 ------------
+
 Contributors are community members who contribute in concrete ways to the
 project. Anyone can become a contributor, and contributions can take many forms
 – not only code – as detailed in the :ref:`contributors guide <contributing>`.
 
+Contributor Experience Team
+---------------------------
+
+The contributor experience team is composed of community members who have permission on
+github to label and close issues. :ref:`Their work <bug_triaging>` is
+crucial to improve the communication in the project and limit the crowding
+of the issue tracker.
+
+Similarly to what has been decided in the `python project
+<https://devguide.python.org/triaging/#becoming-a-member-of-the-python-triage-team>`_,
+any contributor may become a member of the scikit-learn contributor experience team,
+after showing some continuity in participating to scikit-learn
+development (with pull requests and reviews).
+Any core developer or member of the contributor experience team is welcome to propose a
+scikit-learn contributor to join the contributor experience team. Other core developers
+are then consulted: while it is expected that most acceptances will be
+unanimous, a two-thirds majority is enough.
+Every new member of the contributor experience team will be announced in the mailing
+list. Members of the team are welcome to participate in `monthly core developer meetings
+<https://github.com/scikit-learn/administrative/tree/master/meeting_notes>`_.
+
+.. _communication_team:
+
+Communication team
+-------------------
+
+Members of the communication team help with outreach and communication
+for scikit-learn. The goal of the team is to develop public awareness of
+scikit-learn, of its features and usage, as well as branding.
+
+For this, they can operate the scikit-learn accounts on various social
+networks and produce materials.
+
+Every new communicator will be announced in the mailing list.
+Communicators are welcome to participate in `monthly core developer meetings
+<https://github.com/scikit-learn/administrative/tree/master/meeting_notes>`_.
+
 Core developers
 ---------------
+
 Core developers are community members who have shown that they are dedicated to
 the continued development of the project through ongoing engagement with the
 community. They have shown they can be trusted to maintain scikit-learn with
@@ -71,10 +110,11 @@ subject to a two-third majority of all c
 approval of all the current TC members. TC members who do not actively engage
 with the TC duties are expected to resign.
 
-The initial Technical Committee of scikit-learn consists of :user:`Alexandre Gramfort <agramfort>`,
-:user:`Olivier Grisel <ogrisel>`, :user:`Andreas Müller <amueller>`, :user:`Joel Nothman <jnothman>`,
-:user:`Hanmin Qin <qinhanmin2014>`, :user:`Gaël Varoquaux <GaelVaroquaux>`, and
-:user:`Roman Yurchak <rth>`.
+The Technical Committee of scikit-learn consists of :user:`Thomas Fan
+<thomasjpfan>`, :user:`Alexandre Gramfort <agramfort>`, :user:`Olivier Grisel
+<ogrisel>`, :user:`Adrin Jalali <adrinjalali>`, :user:`Andreas Müller
+<amueller>`, :user:`Joel Nothman <jnothman>`, :user:`Gaël Varoquaux
+<GaelVaroquaux>` and :user:`Roman Yurchak <rth>`.
 
 Decision Making Process
 =======================
@@ -88,7 +128,7 @@ Scikit-learn uses a "consensus seeking"
 tries to find a resolution that has no open objections among core developers.
 At any point during the discussion, any core-developer can call for a vote, which will
 conclude one month from the call for the vote. Any vote must be backed by a
-`SLEP <slep>`. If no option can gather two thirds of the votes cast, the
+:ref:`SLEP <slep>`. If no option can gather two thirds of the votes cast, the
 decision is escalated to the TC, which in turn will use consensus seeking with
 the fallback option of a simple majority vote if no consensus can be found
 within a month. This is what we hereafter may refer to as “the decision making
Binary files 0.23.2-5/doc/images/anaconda.png and 1.1.1-1/doc/images/anaconda.png differ
Binary files 0.23.2-5/doc/images/columbia.png and 1.1.1-1/doc/images/columbia.png differ
diff -pruN 0.23.2-5/doc/images/czi_logo.svg 1.1.1-1/doc/images/czi_logo.svg
--- 0.23.2-5/doc/images/czi_logo.svg	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/images/czi_logo.svg	2022-05-19 12:16:26.436782000 +0000
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="192px" height="192px" viewBox="0 0 192 192" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <!-- Generator: Sketch 52.2 (67145) - http://www.bohemiancoding.com/sketch -->
+    <title>nav / elements / czi_mark_red</title>
+    <desc>Created with Sketch.</desc>
+    <defs>
+        <polygon id="path-1" points="0 0 192 0 192 192 0 192"></polygon>
+    </defs>
+    <g id="nav-/-elements-/-czi_mark_red" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="czi_mark">
+            <mask id="mask-2" fill="white">
+                <use xlink:href="#path-1"></use>
+            </mask>
+            <g id="Clip-2"></g>
+            <path d="M69.7933712,96.0856792 C56.904554,96.3514262 47.2394287,87.8624926 46.342904,75.3156235 C45.8651731,68.644557 48.110735,62.2697375 52.6627795,57.388862 C57.219641,52.500079 63.427876,49.7657946 70.1438772,49.71016 C73.5426804,49.6796598 77.1369963,50.3684555 80.213062,51.6949308 C80.213062,51.6949308 79.3749077,58.7000872 79.0980732,61.8545962 L89.6903251,61.9153142 L91.5927482,46.1405096 L88.6107553,44.259383 C83.0403449,40.8543771 76.6238464,39.0543018 70.0475376,39.1124781 C60.4838522,39.1960712 51.2757731,43.2215297 44.7856033,50.1809359 C38.201644,57.2442685 34.9578341,66.424257 35.6463786,76.0473453 C36.2558681,84.5890893 39.9417065,92.3790605 46.0178996,97.9919403 C52.1725812,103.677964 60.4583506,106.741255 69.4148134,106.665287 C69.6332775,106.663028 69.8542918,106.657662 70.0753061,106.650319 C75.5060241,106.50855 81.6227365,105.483123 88.354322,102.824806 L96,88.0373038 C96,88.0373038 95.8450066,87.6955889 94.7606198,88.4473617 C88.1840277,93.0068558 80.4898965,95.8651178 69.7933712,96.0856792 Z" id="Fill-1" fill="#FF414B" mask="url(#mask-2)"></path>
+            <path d="M128.264258,140.830158 C127.731065,146.452835 124.81253,151.094434 120.437535,153.404009 C116.963637,155.237918 113.167297,155.227815 109.745876,153.371175 C106.186106,151.433995 104.498127,148.533417 103.864188,144.868125 C102.862906,139.054059 106.168707,132.991356 110.67195,129.934748 L181.049041,84.1510133 C181.585041,88.0250929 181.869318,91.9799935 181.869318,96 C181.869318,143.38388 143.348592,181.932164 95.9998597,181.932164 C48.6516891,181.932164 10.1309628,143.38388 10.1309628,96 C10.1309628,48.616401 48.6516891,10.0655911 95.9998597,10.0655911 C131.406173,10.0655911 161.85659,31.6327505 174.973438,62.3195017 L183.562348,56.7394801 C168.526003,23.330911 134.948264,0 95.9998597,0 C43.0640987,0 0,43.0641617 0,96 C0,148.933313 43.0640987,192 95.9998597,192 C148.933376,192 192,148.933313 192,96 C192,89.8893095 191.418819,83.9121983 190.322123,78.115812 C189.660402,74.3219922 188.211237,69.2931255 187.972422,68.477899 L167.980181,80.9835569 L141.509354,97.7435463 C140.575984,94.2213751 138.445173,90.6540228 133.924531,88.6012237 C128.571266,86.1709789 119.901815,88.0427725 113.539691,91.6603574 C113.539691,91.6603574 130.963622,57.9473061 133.854094,52.4051694 C134.042957,52.0454034 133.77636,51.6202509 133.368607,51.6227765 L132.299413,51.6328792 L100.784853,51.6076226 L99.4768445,62.5030328 L104.405239,62.4856339 L117.132014,62.4856339 L92.1861209,110.251449 C91.7006339,111.182575 92.706967,112.183578 93.6428625,111.700616 L106.95587,104.624001 C113.383661,101.326053 124.083177,94.5586909 129.373582,98.473181 C130.143346,99.0414541 131.129473,100.545905 131.192615,102.123599 C131.220116,102.734528 130.910864,103.318236 130.39984,103.660322 L103.841457,121.605126 C95.2152229,127.771101 91.8415093,136.945976 92.6415806,145.600005 C93.3105985,152.875585 97.8390977,159.144831 104.560988,162.797775 C108.085679,164.71475 111.899418,165.617532 115.708386,165.512016 C119.048986,165.418847 122.385095,164.546092 125.514381,162.893189 C133.086856,158.890862 138.125818,151.11464 139.001378,142.078114 L141.139766,117.778753 L129.537188,126.397704 L128.264258,140.830158 Z" id="Fill-3" fill="#FF414B" mask="url(#mask-2)"></path>
+        </g>
+    </g>
+</svg>
\ No newline at end of file
Binary files 0.23.2-5/doc/images/digicosme.png and 1.1.1-1/doc/images/digicosme.png differ
Binary files 0.23.2-5/doc/images/grid_search_workflow.png and 1.1.1-1/doc/images/grid_search_workflow.png differ
Binary files 0.23.2-5/doc/images/huggingface_logo-noborder.png and 1.1.1-1/doc/images/huggingface_logo-noborder.png differ
Binary files 0.23.2-5/doc/images/intel.png and 1.1.1-1/doc/images/intel.png differ
Binary files 0.23.2-5/doc/images/intel-small.png and 1.1.1-1/doc/images/intel-small.png differ
Binary files 0.23.2-5/doc/images/logo_APHP.png and 1.1.1-1/doc/images/logo_APHP.png differ
Binary files 0.23.2-5/doc/images/logo_APHP_text.png and 1.1.1-1/doc/images/logo_APHP_text.png differ
Binary files 0.23.2-5/doc/images/multi_org_chart.png and 1.1.1-1/doc/images/multi_org_chart.png differ
Binary files 0.23.2-5/doc/images/quansight-labs.png and 1.1.1-1/doc/images/quansight-labs.png differ
Binary files 0.23.2-5/doc/images/quansight-labs-small.png and 1.1.1-1/doc/images/quansight-labs-small.png differ
Binary files 0.23.2-5/doc/images/sydney-primary.jpeg and 1.1.1-1/doc/images/sydney-primary.jpeg differ
Binary files 0.23.2-5/doc/images/telecom.png and 1.1.1-1/doc/images/telecom.png differ
Binary files 0.23.2-5/doc/images/visual-studio-build-tools-selection.png and 1.1.1-1/doc/images/visual-studio-build-tools-selection.png differ
diff -pruN 0.23.2-5/doc/inspection.rst 1.1.1-1/doc/inspection.rst
--- 0.23.2-5/doc/inspection.rst	2020-08-04 12:12:58.872675400 +0000
+++ 1.1.1-1/doc/inspection.rst	2022-05-19 12:16:26.448782400 +0000
@@ -1,3 +1,7 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
 .. include:: includes/big_toc_css.rst
 
 .. _inspection:
@@ -6,14 +10,14 @@ Inspection
 ----------
 
 Predictive performance is often the main goal of developing machine learning
-models. Yet summarising performance with an evaluation metric is often
+models. Yet summarizing performance with an evaluation metric is often
 insufficient: it assumes that the evaluation metric and test dataset
 perfectly reflect the target domain, which is rarely true. In certain domains,
 a model needs a certain level of interpretability before it can be deployed.
-A model that is exhibiting performance issues needs to be debugged for one to 
-understand the model's underlying issue. The 
-:mod:`sklearn.inspection` module provides tools to help understand the 
-predictions from a model and what affects them. This can be used to 
+A model that is exhibiting performance issues needs to be debugged for one to
+understand the model's underlying issue. The
+:mod:`sklearn.inspection` module provides tools to help understand the
+predictions from a model and what affects them. This can be used to
 evaluate assumptions and biases of a model, design a better model, or
 to diagnose issues with model performance.
 
diff -pruN 0.23.2-5/doc/install.rst 1.1.1-1/doc/install.rst
--- 0.23.2-5/doc/install.rst	2020-08-04 12:12:58.872675400 +0000
+++ 1.1.1-1/doc/install.rst	2022-05-19 12:16:26.448782400 +0000
@@ -59,7 +59,10 @@ Installing the latest release
          <span class="sk-expandable" data-packager="pip" data-os="windows">Install the 64bit version of Python 3, for instance from <a href="https://www.python.org/">https://www.python.org</a>.</span
          ><span class="sk-expandable" data-packager="pip" data-os="mac">Install Python 3 using <a href="https://brew.sh/">homebrew</a> (<code>brew install python</code>) or by manually installing the package from <a href="https://www.python.org">https://www.python.org</a>.</span
          ><span class="sk-expandable" data-packager="pip" data-os="linux">Install python3 and python3-pip using the package manager of the Linux Distribution.</span
-         ><span class="sk-expandable" data-packager="conda"><a href="https://docs.conda.io/projects/conda/en/latest/user-guide/install/">Install conda</a> (no administrator permission required).</span>
+         ><span class="sk-expandable" data-packager="conda"
+            >Install conda using the <a href="https://docs.conda.io/projects/conda/en/latest/user-guide/install/">Anaconda or miniconda</a>
+             installers or the <a href="https://https://github.com/conda-forge/miniforge#miniforge">miniforge</a> installers
+             (no administrator permission required for any of those).</span>
        </div>
 
 Then run:
@@ -77,9 +80,8 @@ Then run:
         ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">pip install -U scikit-learn</span
         ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">pip install -U scikit-learn</span
         ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">pip3 install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="conda" data-venv="">conda create -n sklearn-env</span
-        ><span class="sk-expandable" data-packager="conda" data-venv="">conda activate sklearn-env</span
-        ><span class="sk-expandable" data-packager="conda">conda install scikit-learn </span
+        ><span class="sk-expandable" data-packager="conda">conda create -n sklearn-env -c conda-forge scikit-learn</span
+        ><span class="sk-expandable" data-packager="conda">conda activate sklearn-env</span
        ></code></pre></div>
 
 In order to check your installation you can use
@@ -105,19 +107,16 @@ In order to check your installation you
       ></code></pre></div>
   </div>
 
-
 Note that in order to avoid potential conflicts with other packages it is
-strongly recommended to use a virtual environment, e.g. python3 ``virtualenv``
-(see `python3 virtualenv documentation
-<https://docs.python.org/3/tutorial/venv.html>`_) or `conda environments
+strongly recommended to use a `virtual environment (venv)
+<https://docs.python.org/3/tutorial/venv.html>`_ or a `conda environment
 <https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html>`_.
 
-Using an isolated environment makes possible to install a specific version of
-scikit-learn and its dependencies independently of any previously installed
-Python packages.
-In particular under Linux is it discouraged to install pip packages alongside
-the packages managed by the package manager of the distribution
-(apt, dnf, pacman...).
+Using such an isolated environment makes it possible to install a specific
+version of scikit-learn with pip or conda and its dependencies independently of
+any previously installed Python packages. In particular under Linux is it
+discouraged to install pip packages alongside the packages managed by the
+package manager of the distribution (apt, dnf, pacman...).
 
 Note that you should always remember to activate the environment of your choice
 prior to running any Python command whenever you start a new terminal session.
@@ -128,21 +127,23 @@ and NumPy and SciPy are not recompiled f
 particular configurations of operating system and hardware (such as Linux on
 a Raspberry Pi).
 
-If you must install scikit-learn and its dependencies with pip, you can install
-it as ``scikit-learn[alldeps]``.
 
 Scikit-learn plotting capabilities (i.e., functions start with "plot\_"
-and classes end with "Display") require Matplotlib (>= 2.1.1). For running the
-examples Matplotlib >= 2.1.1 is required. A few examples require
-scikit-image >= 0.13, a few examples require pandas >= 0.18.0, some examples
-require seaborn >= 0.9.0.
+and classes end with "Display") require Matplotlib. The examples require
+Matplotlib and some examples require scikit-image, pandas, or seaborn. The
+minimum version of Scikit-learn dependencies are listed below along with its
+purpose.
+
+.. include:: min_dependency_table.rst
 
 .. warning::
 
     Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4.
     Scikit-learn 0.21 supported Python 3.5-3.7.
     Scikit-learn 0.22 supported Python 3.5-3.8.
-    Scikit-learn now requires Python 3.6 or newer.
+    Scikit-learn 0.23 - 0.24 require Python 3.6 or newer.
+    Scikit-learn 1.0 supported Python 3.7-3.10.
+    Scikit-learn 1.1 and later requires Python 3.8 or newer.
 
 
 .. note::
@@ -150,6 +151,28 @@ require seaborn >= 0.9.0.
    For installing on PyPy, PyPy3-v5.10+, Numpy 1.14.0+, and scipy 1.1.0+
    are required.
 
+.. _install_on_apple_silicon_m1:
+
+Installing on Apple Silicon M1 hardware
+=======================================
+
+The recently introduced `macos/arm64` platform (sometimes also known as
+`macos/aarch64`) requires the open source community to upgrade the build
+configuration and automation to properly support it.
+
+At the time of writing (January 2021), the only way to get a working
+installation of scikit-learn on this hardware is to install scikit-learn and its
+dependencies from the conda-forge distribution, for instance using the miniforge
+installers:
+
+https://github.com/conda-forge/miniforge
+
+The following issue tracks progress on making it possible to install
+scikit-learn from PyPI with pip:
+
+https://github.com/scikit-learn/scikit-learn/issues/19137
+
+
 .. _install_by_distribution:
 
 Third party distributions of scikit-learn
@@ -173,22 +196,24 @@ Arch Linux's package is provided through
 ``python-scikit-learn`` for Python.
 It can be installed by typing the following command:
 
-.. code-block:: none
+.. prompt:: bash $
 
-   $ sudo pacman -S python-scikit-learn
+  sudo pacman -S python-scikit-learn
 
 
 Debian/Ubuntu
 -------------
 
-The Debian/Ubuntu package is splitted in three different packages called
+The Debian/Ubuntu package is split in three different packages called
 ``python3-sklearn`` (python modules), ``python3-sklearn-lib`` (low-level
 implementations and bindings), ``python3-sklearn-doc`` (documentation).
 Only the Python 3 version is available in the Debian Buster (the more recent
 Debian distribution).
-Packages can be installed using ``apt-get``::
+Packages can be installed using ``apt-get``:
 
-    $ sudo apt-get install python3-sklearn python3-sklearn-lib python3-sklearn-doc
+.. prompt:: bash $
+
+  sudo apt-get install python3-sklearn python3-sklearn-lib python3-sklearn-doc
 
 
 Fedora
@@ -196,9 +221,11 @@ Fedora
 
 The Fedora package is called ``python3-scikit-learn`` for the python 3 version,
 the only one available in Fedora30.
-It can be installed using ``dnf``::
+It can be installed using ``dnf``:
+
+.. prompt:: bash $
 
-    $ sudo dnf install python3-scikit-learn
+  sudo dnf install python3-scikit-learn
 
 
 NetBSD
@@ -216,19 +243,20 @@ MacPorts for Mac OSX
 The MacPorts package is named ``py<XY>-scikits-learn``,
 where ``XY`` denotes the Python version.
 It can be installed by typing the following
-command::
+command:
 
-    $ sudo port install py36-scikit-learn
+.. prompt:: bash $
 
+  sudo port install py39-scikit-learn
 
-Canopy and Anaconda for all supported platforms
------------------------------------------------
 
-`Canopy
-<https://www.enthought.com/products/canopy>`_ and `Anaconda
-<https://www.anaconda.com/download>`_ both ship a recent
-version of scikit-learn, in addition to a large set of scientific python
-library for Windows, Mac OSX and Linux.
+Anaconda and Enthought Deployment Manager for all supported platforms
+---------------------------------------------------------------------
+
+`Anaconda <https://www.anaconda.com/download>`_ and
+`Enthought Deployment Manager <https://assets.enthought.com/downloads/>`_
+both ship with scikit-learn in addition to a large set of scientific
+python library for Windows, Mac OSX and Linux.
 
 Anaconda offers scikit-learn as part of its free distribution.
 
@@ -236,9 +264,11 @@ Anaconda offers scikit-learn as part of
 Intel conda channel
 -------------------
 
-Intel maintains a dedicated conda channel that ships scikit-learn::
+Intel maintains a dedicated conda channel that ships scikit-learn:
+
+.. prompt:: bash $
 
-    $ conda install -c intel scikit-learn
+  conda install -c intel scikit-learn
 
 This version of scikit-learn comes with alternative solvers for some common
 estimators. Those solvers come from the DAAL C++ library and are optimized for
@@ -290,6 +320,8 @@ using the ``regedit`` tool:
 #. Edit the value of the ``LongPathsEnabled`` property of that key and set
    it to 1.
 
-#. Reinstall scikit-learn (ignoring the previous broken installation)::
+#. Reinstall scikit-learn (ignoring the previous broken installation):
+
+.. prompt:: python $
 
-       pip install --exists-action=i scikit-learn
+    pip install --exists-action=i scikit-learn
diff -pruN 0.23.2-5/doc/logos/scikit-learn-logo-without-subtitle.svg 1.1.1-1/doc/logos/scikit-learn-logo-without-subtitle.svg
--- 0.23.2-5/doc/logos/scikit-learn-logo-without-subtitle.svg	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/logos/scikit-learn-logo-without-subtitle.svg	2022-05-19 12:16:26.448782400 +0000
@@ -0,0 +1 @@
+<svg height="675" viewBox="0 0 1251 675" width="1251" xmlns="http://www.w3.org/2000/svg"><path d="m959.940063 573.065979c152.410401-152.40155 177.740967-374.157013 56.573914-495.315063-121.148987-121.144471-342.895386-95.818482-495.296936 56.573974-152.40155 152.397125-108.314972 443.556091-56.564972 495.315003 41.818482 41.818542 342.895538 95.818542 495.287994-56.573914z" fill="#f89939"/><path d="m334.575043 352.849548c-88.415985-88.416046-217.089035-103.135528-287.401535-32.827575-70.294476 70.299041-55.597481 198.98999 32.836487 287.392578 88.434036 88.442993 257.377548 62.860473 287.383529 32.827453 24.281983-24.241455 55.624481-198.967468-32.818481-287.392456z" fill="#3499cd"/><g fill="#010101"><path d="m639.643494 535.711487c-15.619507 14.377502-29.322022 24.988525-41.09845 31.805969-11.77655 6.840088-23.008545 10.255493-33.696045 10.255493-12.293945 0-22.212036-4.765503-29.731445-14.300903-7.53302-9.544556-11.286011-22.342529-11.286011-38.443543 0-24.12445 5.228943-53.086486 15.686951-86.854461 10.440063-33.795044 23.152527-64.935089 38.083496-93.424561l43.780456-16.208954c1.37262-.459106 2.416565-.692962 3.109558-.692962 3.321045 0 6.065979 2.447876 8.172059 7.321411 2.128417 4.896057 3.199462 11.474945 3.199462 19.746002 0 23.440582-5.395507 46.134064-16.208923 68.080505-10.809082 21.955475-27.693054 45.386963-50.670044 70.321534-.922546 11.951904-1.381531 20.159912-1.381531 24.646484 0 10.003418 1.835999 17.918945 5.512513 23.782471 3.680969 5.872497 8.55896 8.788574 14.651977 8.788574 6.214478 0 12.81604-2.223145 19.827026-6.705139 6.997559-4.490906 17.685059-13.787964 32.044434-27.931458v19.813538zm-66.005982-67.378479c14.589051-16.222534 26.4375-34.416016 35.509461-54.544495 9.072082-20.137512 13.603576-37.458008 13.603576-51.97055 0-4.230011-.625549-7.667908-1.881042-10.250916-1.264527-2.587555-2.884461-3.888061-4.833008-3.888061-4.234436 0-10.421936 10.583953-18.526489 31.75653-8.104492 21.16803-16.060547 50.805024-23.872498 88.897492z"/><path d="m768.577576 535.711487c-14.589051 14.377502-27.684021 24.988525-39.294007 31.805969-11.610046 6.840088-24.40802 10.255493-38.43457 10.255493-15.628418 0-28.237427-4.999511-37.844971-14.984985-9.589416-10.012512-14.377441-23.156983-14.377441-39.478516 0-24.353943 8.4375-46.390472 25.348511-66.095947 16.875-19.714569 35.612976-29.565063 56.178039-29.565063 10.6875 0 19.237488 2.767608 25.681519 8.279998 6.434936 5.521576 9.652405 12.753083 9.652405 21.717072 0 23.791443-25.27649 43.083038-75.833924 57.910461 4.589966 22.396607 16.595947 33.610474 36.018006 33.610474 7.586853 0 14.818481-2.038513 21.707885-6.106506 6.907471-4.085938 17.298096-13.148926 31.203064-27.157471v19.809021zm-90.315064-31.878052c29.407532-8.279999 44.12262-23.552917 44.12262-45.845947 0-11.02948-4.027588-16.541992-12.06012-16.541992-7.586975 0-14.818481 5.764526-21.708008 17.325012-6.911926 11.542541-10.354492 26.554504-10.354492 45.062927z"/><path d="m952.654419 535.711487c-18.386963 17.464538-31.545044 28.853943-39.464905 34.146057-7.929138 5.282898-15.511597 7.919922-22.756531 7.919922-18.157531 0-26.712036-16.024536-25.681518-48.087036-11.488526 16.42511-22.095032 28.548095-31.805969 36.378051-9.702027 7.812012-19.723511 11.708985-30.078125 11.708985-10.097901 0-18.683899-4.729492-25.762391-14.210938-7.078613-9.481506-10.593017-21.109558-10.593017-34.91101 0-17.226014 4.729492-33.660035 14.201904-49.297577 9.49054-15.628449 21.640625-28.255463 36.459107-37.907929 14.818481-9.652588 27.931518-14.485473 39.293945-14.485473 14.368469 0 24.426086 6.610473 30.172485 19.817993l35.226013-19.467102h9.666016l-15.214538 50.494537c-7.811951 25.402435-11.731507 42.813019-11.731507 52.231476 0 9.877502 3.496582 14.818481 10.512024 14.818481 4.463989 0 9.404968-2.380432 14.80957-7.154968 5.404419-4.774536 12.97345-12.041992 22.738404-21.807007v19.813538zm-126.166443 9.490539c11.488403 0 22.31543-9.791992 32.503479-29.380615 10.170044-19.597412 15.250549-37.678406 15.250549-54.220398 0-6.426025-1.449096-11.461487-4.306518-15.075012-2.884583-3.631531-6.731995-5.431519-11.547058-5.431519-11.497437 0-22.396424 9.765076-32.66095 29.303956-10.282532 19.539062-15.434998 37.521057-15.434998 53.937133 0 6.214417 1.53003 11.240906 4.571961 15.092896 3.041992 3.852051 6.903015 5.773559 11.623535 5.773559z"/><path d="m1081.412964 535.711487c-28.844971 28.264526-51.083985 42.40802-66.708008 42.40802-7.015442 0-12.9375-2.96106-17.752563-8.860596-4.814881-5.921875-7.240357-13.25238-7.240357-21.991455 0-16.200012 8.684937-37.907898 26.032471-65.146454-8.509583 4.369538-17.806458 7.402436-27.922547 9.130463-7.470031 13.787964-19.19696 28.615539-35.162963 44.455505h-3.955506v-15.493469c8.954956-9.305969 17.059448-19.30957 24.299988-29.99707-9.895569-4.369416-14.827454-10.862885-14.827454-19.46698 0-8.860474 3.005982-18.30597 9.053955-28.372437 6.03003-10.044006 14.328003-15.065979 24.907532-15.065979 8.963928 0 13.436951 4.580872 13.436951 13.778931 0 7.240539-2.583008 17.577026-7.762512 31.027588 19.071045-2.074524 35.734558-16.654602 49.990539-43.780518l15.678101-.693115-16.029053 44.12262c-6.660034 18.616455-10.970947 31.297424-12.919556 38.011444-1.948608 6.714019-2.934082 12.672027-2.934082 17.833587 0 4.832886 1.125 8.693848 3.357056 11.546875 2.241089 2.893555 5.265015 4.315552 9.053955 4.315552 4.130982 0 8.104614-1.412964 11.893555-4.212036 3.789062-2.839539 12.293945-10.615479 25.515014-23.368408v19.817932z"/><path d="m1250.676025 535.711487c-26.541015 28.053039-49.306518 42.065979-68.255981 42.065979-7.699463 0-13.905029-2.700073-18.616455-8.104492-4.720581-5.395508-7.074097-12.631531-7.074097-21.708008 0-12.294007 5.0625-31.085938 15.178589-56.353424 5.395508-13.563019 8.104492-22.194031 8.104492-25.857025 0-3.681092-1.448974-5.521576-4.306518-5.521576-1.606568 0-3.744019.810089-6.380982 2.407501-2.425537 1.606506-5.238037 3.865539-8.455566 6.732025-2.866455 2.636993-6.093018 5.854462-9.652466 9.63446-3.109497 3.244538-6.44397 6.916596-9.985596 11.038514l-9.665893 11.214019c-4.243408 5.166047-6.889527 10.61554-7.920044 16.366456-1.732544 9.765075-2.875488 18.738037-3.456055 26.905517-.350952 6.075012-.517456 14.283081-.517456 24.646607l-38.092407 8.945861c-1.255493-15.511413-1.899048-27.062866-1.899048-34.636413 0-18.499451 2.155518-36.026917 6.470947-52.569031 4.306519-16.559998 11.223145-35.162994 20.767456-55.853943l42.048096-8.095581c-8.842407 23.791596-14.642944 42.511597-17.401489 56.17804 18.845947-21.023987 33.786011-35.577027 44.860473-43.69043 11.056519-8.104614 20.902466-12.136597 29.506592-12.136597 5.845337 0 10.7323 2.205109 14.625 6.619568 3.910401 4.418945 5.85437 9.967468 5.85437 16.600464 0 11.020508-4.940918 29.17804-14.80957 54.468018-6.785889 17.343017-10.178955 28.597534-10.178955 33.795044 0 6.916442 2.821655 10.372497 8.4646 10.372497 8.401489 0 22.009399-11.092529 40.787963-33.268555z"/></g><path d="m692.743469 295.258514h1013.589051v377.766022h-1013.589051z" fill="none"/><text fill="#fff" font-family="Helvetica" font-size="103.85775" x="688" y="370">scikit</text><path d="m1015.055969 620.905518h1464.444031v193.333557h-1464.444031z" fill="none"/></svg>
diff -pruN 0.23.2-5/doc/Makefile 1.1.1-1/doc/Makefile
--- 0.23.2-5/doc/Makefile	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/Makefile	2022-05-19 12:16:26.432781500 +0000
@@ -17,7 +17,7 @@ ALLSPHINXOPTS   = -T -d $(BUILDDIR)/doct
     $(EXAMPLES_PATTERN_OPTS) .
 
 
-.PHONY: help clean html dirhtml pickle json latex latexpdf changes linkcheck doctest optipng
+.PHONY: help clean html dirhtml ziphtml pickle json latex latexpdf changes linkcheck doctest optipng
 
 all: html-noplot
 
@@ -25,6 +25,7 @@ help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html      to make standalone HTML files"
 	@echo "  dirhtml   to make HTML files named index.html in directories"
+	@echo "  ziphtml   to make a ZIP of the HTML"
 	@echo "  pickle    to make pickle files"
 	@echo "  json      to make JSON files"
 	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@@ -58,6 +59,19 @@ dirhtml:
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 
+ziphtml:
+	@if [ ! -d "$(BUILDDIR)/html/stable/" ]; then \
+		make html; \
+	fi
+	# Optimize the images to reduce the size of the ZIP
+	optipng $(BUILDDIR)/html/stable/_images/*.png
+	# Exclude the output directory to avoid infinity recursion
+	cd $(BUILDDIR)/html/stable; \
+	zip -q -x _downloads \
+	       -r _downloads/scikit-learn-docs.zip .
+	@echo
+	@echo "Build finished. The ZIP of the HTML is in $(BUILDDIR)/html/stable/_downloads."
+
 pickle:
 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 	@echo
@@ -106,5 +120,4 @@ optipng:
 	find _build auto_examples */generated -name '*.png' -print0 \
 	  | xargs -0 -n 1 -P 4 optipng -o10
 
-dist: html latexpdf
-	cp _build/latex/user_guide.pdf _build/html/stable/_downloads/scikit-learn-docs.pdf
+dist: html ziphtml
diff -pruN 0.23.2-5/doc/model_persistence.rst 1.1.1-1/doc/model_persistence.rst
--- 0.23.2-5/doc/model_persistence.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/model_persistence.rst	2022-05-19 12:16:26.448782400 +0000
@@ -0,0 +1,125 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
+.. _model_persistence:
+
+=================
+Model persistence
+=================
+
+After training a scikit-learn model, it is desirable to have a way to persist
+the model for future use without having to retrain. The following sections give
+you some hints on how to persist a scikit-learn model.
+
+Python specific serialization
+-----------------------------
+
+It is possible to save a model in scikit-learn by using Python's built-in
+persistence model, namely `pickle
+<https://docs.python.org/3/library/pickle.html>`_::
+
+  >>> from sklearn import svm
+  >>> from sklearn import datasets
+  >>> clf = svm.SVC()
+  >>> X, y= datasets.load_iris(return_X_y=True)
+  >>> clf.fit(X, y)
+  SVC()
+
+  >>> import pickle
+  >>> s = pickle.dumps(clf)
+  >>> clf2 = pickle.loads(s)
+  >>> clf2.predict(X[0:1])
+  array([0])
+  >>> y[0]
+  0
+
+In the specific case of scikit-learn, it may be better to use joblib's
+replacement of pickle (``dump`` & ``load``), which is more efficient on
+objects that carry large numpy arrays internally as is often the case for
+fitted scikit-learn estimators, but can only pickle to the disk and not to a
+string::
+
+  >>> from joblib import dump, load
+  >>> dump(clf, 'filename.joblib') # doctest: +SKIP
+
+Later you can load back the pickled model (possibly in another Python process)
+with::
+
+  >>> clf = load('filename.joblib') # doctest:+SKIP
+
+.. note::
+
+   ``dump`` and ``load`` functions also accept file-like object
+   instead of filenames. More information on data persistence with Joblib is
+   available `here
+   <https://joblib.readthedocs.io/en/latest/persistence.html>`_.
+
+.. _persistence_limitations:
+
+Security & maintainability limitations
+......................................
+
+pickle (and joblib by extension), has some issues regarding maintainability
+and security. Because of this,
+
+* Never unpickle untrusted data as it could lead to malicious code being
+  executed upon loading.
+* While models saved using one version of scikit-learn might load in
+  other versions, this is entirely unsupported and inadvisable. It should
+  also be kept in mind that operations performed on such data could give
+  different and unexpected results.
+
+In order to rebuild a similar model with future versions of scikit-learn,
+additional metadata should be saved along the pickled model:
+
+* The training data, e.g. a reference to an immutable snapshot
+* The python source code used to generate the model
+* The versions of scikit-learn and its dependencies
+* The cross validation score obtained on the training data
+
+This should make it possible to check that the cross-validation score is in the
+same range as before.
+
+Aside for a few exceptions, pickled models should be portable across
+architectures assuming the same versions of dependencies and Python are used.
+If you encounter an estimator that is not portable please open an issue on
+GitHub. Pickled models are often deployed in production using containers, like
+Docker, in order to freeze the environment and dependencies.
+
+If you want to know more about these issues and explore other possible
+serialization methods, please refer to this
+`talk by Alex Gaynor
+<https://pyvideo.org/video/2566/pickles-are-for-delis-not-software>`_.
+
+Interoperable formats
+---------------------
+
+For reproducibility and quality control needs, when different architectures
+and environments should be taken into account, exporting the model in
+`Open Neural Network
+Exchange <https://onnx.ai/>`_ format or `Predictive Model Markup Language
+(PMML) <http://dmg.org/pmml/v4-4-1/GeneralStructure.html>`_ format
+might be a better approach than using `pickle` alone.
+These are helpful where you may want to use your model for prediction in a
+different environment from where the model was trained.
+
+ONNX is a binary serialization of the model. It has been developed to improve
+the usability of the interoperable representation of data models.
+It aims to facilitate the conversion of the data
+models between different machine learning frameworks, and to improve their
+portability on different computing architectures. More details are available
+from the `ONNX tutorial <https://onnx.ai/get-started.html>`_.
+To convert scikit-learn model to ONNX a specific tool `sklearn-onnx
+<http://onnx.ai/sklearn-onnx/>`_ has been developed.
+
+PMML is an implementation of the `XML
+<https://en.wikipedia.org/wiki/XML>`_ document standard
+defined to represent data models together with the data used to generate them.
+Being human and machine readable,
+PMML is a good option for model validation on different platforms and
+long term archiving. On the other hand, as XML in general, its verbosity does
+not help in production when performance is critical.
+To convert scikit-learn model to PMML you can use for example `sklearn2pmml
+<https://github.com/jpmml/sklearn2pmml>`_ distributed under the Affero GPLv3
+license.
diff -pruN 0.23.2-5/doc/model_selection.rst 1.1.1-1/doc/model_selection.rst
--- 0.23.2-5/doc/model_selection.rst	2020-08-04 12:12:58.872675400 +0000
+++ 1.1.1-1/doc/model_selection.rst	2022-05-19 12:16:26.448782400 +0000
@@ -1,3 +1,7 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
 .. include:: includes/big_toc_css.rst
 
 .. _model_selection:
@@ -11,5 +15,4 @@ Model selection and evaluation
     modules/cross_validation
     modules/grid_search
     modules/model_evaluation
-    modules/model_persistence
     modules/learning_curve
diff -pruN 0.23.2-5/doc/modules/biclustering.rst 1.1.1-1/doc/modules/biclustering.rst
--- 0.23.2-5/doc/modules/biclustering.rst	2020-08-04 12:12:58.872675400 +0000
+++ 1.1.1-1/doc/modules/biclustering.rst	2022-05-19 12:16:26.448782400 +0000
@@ -160,9 +160,9 @@ and the remaining ``n_columns`` labels p
 
 .. topic:: References:
 
- * Dhillon, Inderjit S, 2001. `Co-clustering documents and words using
+ * Dhillon, Inderjit S, 2001. :doi:`Co-clustering documents and words using
    bipartite spectral graph partitioning
-   <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.140.3011>`__.
+   <10.1145/502512.502550>`
 
 
 .. _spectral_biclustering:
@@ -243,9 +243,9 @@ clustering this :math:`n \times q` matri
 
 .. topic:: References:
 
- * Kluger, Yuval, et. al., 2003. `Spectral biclustering of microarray
+ * Kluger, Yuval, et. al., 2003. :doi:`Spectral biclustering of microarray
    data: coclustering genes and conditions
-   <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.1608>`__.
+   <10.1101/gr.648603>`
 
 
 .. _biclustering_evaluation:
diff -pruN 0.23.2-5/doc/modules/calibration.rst 1.1.1-1/doc/modules/calibration.rst
--- 0.23.2-5/doc/modules/calibration.rst	2020-08-04 12:12:58.872675400 +0000
+++ 1.1.1-1/doc/modules/calibration.rst	2022-05-19 12:16:26.448782400 +0000
@@ -11,24 +11,44 @@ When performing classification you often
 label, but also obtain a probability of the respective label. This probability
 gives you some kind of confidence on the prediction. Some models can give you
 poor estimates of the class probabilities and some even do not support
-probability prediction. The calibration module allows you to better calibrate
+probability prediction (e.g., some instances of
+:class:`~sklearn.linear_model.SGDClassifier`).
+The calibration module allows you to better calibrate
 the probabilities of a given model, or to add support for probability
 prediction.
 
 Well calibrated classifiers are probabilistic classifiers for which the output
-of the predict_proba method can be directly interpreted as a confidence level.
+of the :term:`predict_proba` method can be directly interpreted as a confidence
+level.
 For instance, a well calibrated (binary) classifier should classify the samples
-such that among the samples to which it gave a predict_proba value close to 0.8,
+such that among the samples to which it gave a :term:`predict_proba` value
+close to 0.8,
 approximately 80% actually belong to the positive class.
 
+.. _calibration_curve:
+
 Calibration curves
 ------------------
 
-The following plot compares how well the probabilistic predictions of
-different classifiers are calibrated, using :func:`calibration_curve`.
+Calibration curves (also known as reliability diagrams) compare how well the
+probabilistic predictions of a binary classifier are calibrated. It plots
+the true frequency of the positive label against its predicted probability,
+for binned predictions.
 The x axis represents the average predicted probability in each bin. The
 y axis is the *fraction of positives*, i.e. the proportion of samples whose
-class is the positive class (in each bin).
+class is the positive class (in each bin). The top calibration curve plot
+is created with :func:`CalibrationDisplay.from_estimators`, which uses
+:func:`calibration_curve` to calculate the per bin average predicted
+probabilities and fraction of positives.
+:func:`CalibrationDisplay.from_estimator`
+takes as input a fitted classifier, which is used to calculate the predicted
+probabilities. The classifier thus must have :term:`predict_proba` method. For
+the few classifiers that do not have a :term:`predict_proba` method, it is
+possible to use :class:`CalibratedClassifierCV` to calibrate the classifier
+outputs to probabilities.
+
+The bottom histogram gives some insight into the behavior of each classifier
+by showing the number of samples in each predicted probability bin.
 
 .. figure:: ../auto_examples/calibration/images/sphx_glr_plot_compare_calibration_001.png
    :target: ../auto_examples/calibration/plot_compare_calibration.html
@@ -37,7 +57,7 @@ class is the positive class (in each bin
 .. currentmodule:: sklearn.linear_model
 
 :class:`LogisticRegression` returns well calibrated predictions by default as it directly
-optimizes log-loss. In contrast, the other methods return biased probabilities;
+optimizes :ref:`log_loss`. In contrast, the other methods return biased probabilities;
 with different biases per method:
 
 .. currentmodule:: sklearn.naive_bayes
@@ -73,68 +93,153 @@ to 0 or 1 typically.
 .. currentmodule:: sklearn.svm
 
 Linear Support Vector Classification (:class:`LinearSVC`) shows an even more
-sigmoid curve as the RandomForestClassifier, which is typical for
-maximum-margin methods (compare Niculescu-Mizil and Caruana [1]_), which
-focus on hard samples that are close to the decision boundary (the support
-vectors).
+sigmoid curve than :class:`~sklearn.ensemble.RandomForestClassifier`, which is
+typical for maximum-margin methods (compare Niculescu-Mizil and Caruana [1]_),
+which focus on difficult to classify samples that are close to the decision
+boundary (the support vectors).
 
 Calibrating a classifier
 ------------------------
 
 .. currentmodule:: sklearn.calibration
 
-Calibrating a classifier consists in fitting a regressor (called a
+Calibrating a classifier consists of fitting a regressor (called a
 *calibrator*) that maps the output of the classifier (as given by
-:term:`predict` or :term:`predict_proba`) to a calibrated probability in [0,
-1]. Denoting the output of the classifier for a given sample by :math:`f_i`,
+:term:`decision_function` or :term:`predict_proba`) to a calibrated probability
+in [0, 1]. Denoting the output of the classifier for a given sample by :math:`f_i`,
 the calibrator tries to predict :math:`p(y_i = 1 | f_i)`.
 
-The samples that are used to train the calibrator should not be used to
-train the target classifier.
+The samples that are used to fit the calibrator should not be the same
+samples used to fit the classifier, as this would introduce bias.
+This is because performance of the classifier on its training data would be
+better than for novel data. Using the classifier output of training data
+to fit the calibrator would thus result in a biased calibrator that maps to
+probabilities closer to 0 and 1 than it should.
 
 Usage
 -----
 
 The :class:`CalibratedClassifierCV` class is used to calibrate a classifier.
 
-:class:`CalibratedClassifierCV` uses a cross-validation approach to fit both
-the classifier and the regressor. For each of the k `(trainset, testset)`
-couple, a classifier is trained on the train set, and its predictions on the
-test set are used to fit a regressor. We end up with k
-`(classifier, regressor)` couples where each regressor maps the output of
-its corresponding classifier into [0, 1]. Each couple is exposed in the
-`calibrated_classifiers_` attribute, where each entry is a calibrated
+:class:`CalibratedClassifierCV` uses a cross-validation approach to ensure
+unbiased data is always used to fit the calibrator. The data is split into k
+`(train_set, test_set)` couples (as determined by `cv`). When `ensemble=True`
+(default), the following procedure is repeated independently for each
+cross-validation split: a clone of `base_estimator` is first trained on the
+train subset. Then its predictions on the test subset are used to fit a
+calibrator (either a sigmoid or isotonic regressor). This results in an
+ensemble of k `(classifier, calibrator)` couples where each calibrator maps
+the output of its corresponding classifier into [0, 1]. Each couple is exposed
+in the `calibrated_classifiers_` attribute, where each entry is a calibrated
 classifier with a :term:`predict_proba` method that outputs calibrated
 probabilities. The output of :term:`predict_proba` for the main
 :class:`CalibratedClassifierCV` instance corresponds to the average of the
-predicted probabilities of the `k` estimators in the
-`calibrated_classifiers_` list. The output of :term:`predict` is the class
-that has the highest probability.
-
-The regressor that is used for calibration depends on the `method`
-parameter. `'sigmoid'` corresponds to a parametric approach based on Platt's
-logistic model [3]_, i.e. :math:`p(y_i = 1 | f_i)` is modeled as
-:math:`\sigma(A f_i + B)` where :math:`\sigma` is the logistic function, and
-:math:`A` and :math:`B` are real numbers to be determined when fitting the
-regressor via maximum likelihood. `'isotonic'` will instead fit a
-non-parametric isotonic regressor, which outputs a step-wise non-decreasing
-function (see :mod:`sklearn.isotonic`).
-
-An already fitted classifier can be calibrated by setting `cv="prefit"`. In
-this case, the data is only used to fit the regressor. It is up to the user
+predicted probabilities of the `k` estimators in the `calibrated_classifiers_`
+list. The output of :term:`predict` is the class that has the highest
+probability.
+
+When `ensemble=False`, cross-validation is used to obtain 'unbiased'
+predictions for all the data, via
+:func:`~sklearn.model_selection.cross_val_predict`.
+These unbiased predictions are then used to train the calibrator. The attribute
+`calibrated_classifiers_` consists of only one `(classifier, calibrator)`
+couple where the classifier is the `base_estimator` trained on all the data.
+In this case the output of :term:`predict_proba` for
+:class:`CalibratedClassifierCV` is the predicted probabilities obtained
+from the single `(classifier, calibrator)` couple.
+
+The main advantage of `ensemble=True` is to benefit from the traditional
+ensembling effect (similar to :ref:`bagging`). The resulting ensemble should
+both be well calibrated and slightly more accurate than with `ensemble=False`.
+The main advantage of using `ensemble=False` is computational: it reduces the
+overall fit time by training only a single base classifier and calibrator
+pair, decreases the final model size and increases prediction speed.
+
+Alternatively an already fitted classifier can be calibrated by setting
+`cv="prefit"`. In this case, the data is not split and all of it is used to
+fit the regressor. It is up to the user to
 make sure that the data used for fitting the classifier is disjoint from the
 data used for fitting the regressor.
 
-:class:`CalibratedClassifierCV` can calibrate probabilities in a multiclass
-setting if the base estimator supports multiclass predictions. The classifier
-is calibrated first for each class separately in a one-vs-rest fashion [4]_.
-When predicting probabilities, the calibrated probabilities for each class
+:func:`sklearn.metrics.brier_score_loss` may be used to assess how
+well a classifier is calibrated. However, this metric should be used with care
+because a lower Brier score does not always mean a better calibrated model.
+This is because the Brier score metric is a combination of calibration loss
+and refinement loss. Calibration loss is defined as the mean squared deviation
+from empirical probabilities derived from the slope of ROC segments.
+Refinement loss can be defined as the expected optimal loss as measured by the
+area under the optimal cost curve. As refinement loss can change
+independently from calibration loss, a lower Brier score does not necessarily
+mean a better calibrated model.
+
+:class:`CalibratedClassifierCV` supports the use of two 'calibration'
+regressors: 'sigmoid' and 'isotonic'.
+
+.. _sigmoid_regressor:
+
+Sigmoid
+^^^^^^^
+
+The sigmoid regressor is based on Platt's logistic model [3]_:
+
+.. math::
+       p(y_i = 1 | f_i) = \frac{1}{1 + \exp(A f_i + B)}
+
+where :math:`y_i` is the true label of sample :math:`i` and :math:`f_i`
+is the output of the un-calibrated classifier for sample :math:`i`. :math:`A`
+and :math:`B` are real numbers to be determined when fitting the regressor via
+maximum likelihood.
+
+The sigmoid method assumes the :ref:`calibration curve <calibration_curve>`
+can be corrected by applying a sigmoid function to the raw predictions. This
+assumption has been empirically justified in the case of :ref:`svm` with
+common kernel functions on various benchmark datasets in section 2.1 of Platt
+1999 [3]_ but does not necessarily hold in general. Additionally, the
+logistic model works best if the calibration error is symmetrical, meaning
+the classifier output for each binary class is normally distributed with
+the same variance [6]_. This can be a problem for highly imbalanced
+classification problems, where outputs do not have equal variance.
+
+In general this method is most effective when the un-calibrated model is
+under-confident and has similar calibration errors for both high and low
+outputs.
+
+Isotonic
+^^^^^^^^
+
+The 'isotonic' method fits a non-parametric isotonic regressor, which outputs
+a step-wise non-decreasing function (see :mod:`sklearn.isotonic`). It
+minimizes:
+
+.. math::
+       \sum_{i=1}^{n} (y_i - \hat{f}_i)^2
+
+subject to :math:`\hat{f}_i >= \hat{f}_j` whenever
+:math:`f_i >= f_j`. :math:`y_i` is the true
+label of sample :math:`i` and :math:`\hat{f}_i` is the output of the
+calibrated classifier for sample :math:`i` (i.e., the calibrated probability).
+This method is more general when compared to 'sigmoid' as the only restriction
+is that the mapping function is monotonically increasing. It is thus more
+powerful as it can correct any monotonic distortion of the un-calibrated model.
+However, it is more prone to overfitting, especially on small datasets [5]_.
+
+Overall, 'isotonic' will perform as well as or better than 'sigmoid' when
+there is enough data (greater than ~ 1000 samples) to avoid overfitting [1]_.
+
+Multiclass support
+^^^^^^^^^^^^^^^^^^
+
+Both isotonic and sigmoid regressors only
+support 1-dimensional data (e.g., binary classification output) but are
+extended for multiclass classification if the `base_estimator` supports
+multiclass predictions. For multiclass predictions,
+:class:`CalibratedClassifierCV` calibrates for
+each class separately in a :ref:`ovr_classification` fashion [4]_. When
+predicting
+probabilities, the calibrated probabilities for each class
 are predicted separately. As those probabilities do not necessarily sum to
 one, a postprocessing is performed to normalize them.
 
-The :func:`sklearn.metrics.brier_score_loss` may be used to evaluate how
-well a classifier is calibrated.
-
 .. topic:: Examples:
 
    * :ref:`sphx_glr_auto_examples_calibration_plot_calibration_curve.py`
@@ -144,15 +249,31 @@ well a classifier is calibrated.
 
 .. topic:: References:
 
-    .. [1] Predicting Good Probabilities with Supervised Learning,
+    .. [1] `Predicting Good Probabilities with Supervised Learning
+           <https://www.cs.cornell.edu/~alexn/papers/calibration.icml05.crc.rev3.pdf>`_,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
 
-    .. [2] On the combination of forecast probabilities for
-           consecutive precipitation periods. Wea. Forecasting, 5, 640–650.,
-           Wilks, D. S., 1990a
-
-    .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons
-           to Regularized Likelihood Methods, J. Platt, (1999)
-
-    .. [4] Transforming Classifier Scores into Accurate Multiclass
-           Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)
+    .. [2] `On the combination of forecast probabilities for
+           consecutive precipitation periods.
+           <https://journals.ametsoc.org/waf/article/5/4/640/40179>`_
+           Wea. Forecasting, 5, 640–650., Wilks, D. S., 1990a
+
+    .. [3] `Probabilistic Outputs for Support Vector Machines and Comparisons
+           to Regularized Likelihood Methods.
+           <https://www.cs.colorado.edu/~mozer/Teaching/syllabi/6622/papers/Platt1999.pdf>`_
+           J. Platt, (1999)
+
+    .. [4] `Transforming Classifier Scores into Accurate Multiclass
+           Probability Estimates.
+           <https://dl.acm.org/doi/pdf/10.1145/775047.775151>`_
+           B. Zadrozny & C. Elkan, (KDD 2002)
+
+    .. [5] `Predicting accurate probabilities with a ranking loss.
+           <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4180410/>`_
+           Menon AK, Jiang XJ, Vembu S, Elkan C, Ohno-Machado L.
+           Proc Int Conf Mach Learn. 2012;2012:703-710
+
+    .. [6] `Beyond sigmoids: How to obtain well-calibrated probabilities from
+           binary classifiers with beta calibration
+           <https://projecteuclid.org/euclid.ejs/1513306867>`_
+           Kull, M., Silva Filho, T. M., & Flach, P. (2017).
diff -pruN 0.23.2-5/doc/modules/classes.rst 1.1.1-1/doc/modules/classes.rst
--- 0.23.2-5/doc/modules/classes.rst	2020-08-04 12:12:58.872675400 +0000
+++ 1.1.1-1/doc/modules/classes.rst	2022-05-19 12:16:26.448782400 +0000
@@ -23,6 +23,7 @@ Base classes
 .. currentmodule:: sklearn
 
 .. autosummary::
+   :nosignatures:
    :toctree: generated/
    :template: class.rst
 
@@ -103,6 +104,7 @@ Classes
    cluster.DBSCAN
    cluster.FeatureAgglomeration
    cluster.KMeans
+   cluster.BisectingKMeans
    cluster.MiniBatchKMeans
    cluster.MeanShift
    cluster.OPTICS
@@ -123,6 +125,7 @@ Functions
    cluster.dbscan
    cluster.estimate_bandwidth
    cluster.k_means
+   cluster.kmeans_plusplus
    cluster.mean_shift
    cluster.spectral_clustering
    cluster.ward_tree
@@ -317,6 +320,7 @@ Samples generator
    decomposition.MiniBatchDictionaryLearning
    decomposition.MiniBatchSparsePCA
    decomposition.NMF
+   decomposition.MiniBatchNMF
    decomposition.PCA
    decomposition.SparsePCA
    decomposition.SparseCoder
@@ -431,16 +435,14 @@ Samples generator
 
 .. autosummary::
    :toctree: generated/
-   :template: class_without_init.rst
+   :template: class.rst
 
-   exceptions.ChangedBehaviorWarning
    exceptions.ConvergenceWarning
    exceptions.DataConversionWarning
    exceptions.DataDimensionalityWarning
    exceptions.EfficiencyWarning
    exceptions.FitFailedWarning
    exceptions.NotFittedError
-   exceptions.NonBLASDotWarning
    exceptions.UndefinedMetricWarning
 
 
@@ -458,6 +460,7 @@ Samples generator
 
    experimental.enable_hist_gradient_boosting
    experimental.enable_iterative_imputer
+   experimental.enable_halving_search_cv
 
 
 .. _feature_extraction_ref:
@@ -547,6 +550,7 @@ From text
    feature_selection.SelectFdr
    feature_selection.SelectFromModel
    feature_selection.SelectFwe
+   feature_selection.SequentialFeatureSelector
    feature_selection.RFE
    feature_selection.RFECV
    feature_selection.VarianceThreshold
@@ -558,6 +562,7 @@ From text
    feature_selection.chi2
    feature_selection.f_classif
    feature_selection.f_regression
+   feature_selection.r_regression
    feature_selection.mutual_info_classif
    feature_selection.mutual_info_regression
 
@@ -629,7 +634,7 @@ Kernels:
 
 .. _inspection_ref:
 
-:mod:`sklearn.inspection`: inspection
+:mod:`sklearn.inspection`: Inspection
 =====================================
 
 .. automodule:: sklearn.inspection
@@ -654,6 +659,7 @@ Plotting
    :toctree: generated/
    :template: class.rst
 
+   inspection.DecisionBoundaryDisplay
    inspection.PartialDependenceDisplay
 
 .. autosummary::
@@ -691,8 +697,8 @@ Plotting
 
 .. _kernel_approximation_ref:
 
-:mod:`sklearn.kernel_approximation` Kernel Approximation
-========================================================
+:mod:`sklearn.kernel_approximation`: Kernel Approximation
+=========================================================
 
 .. automodule:: sklearn.kernel_approximation
    :no-members:
@@ -708,13 +714,14 @@ Plotting
 
    kernel_approximation.AdditiveChi2Sampler
    kernel_approximation.Nystroem
+   kernel_approximation.PolynomialCountSketch
    kernel_approximation.RBFSampler
    kernel_approximation.SkewedChi2Sampler
 
 .. _kernel_ridge_ref:
 
-:mod:`sklearn.kernel_ridge` Kernel Ridge Regression
-========================================================
+:mod:`sklearn.kernel_ridge`: Kernel Ridge Regression
+====================================================
 
 .. automodule:: sklearn.kernel_ridge
    :no-members:
@@ -759,6 +766,7 @@ Linear classifiers
    linear_model.RidgeClassifier
    linear_model.RidgeClassifierCV
    linear_model.SGDClassifier
+   linear_model.SGDOneClassSVM
 
 Classical linear regressors
 ---------------------------
@@ -834,6 +842,7 @@ Any estimator using the Huber loss would
    :template: class.rst
 
    linear_model.HuberRegressor
+   linear_model.QuantileRegressor
    linear_model.RANSACRegressor
    linear_model.TheilSenRegressor
 
@@ -900,7 +909,7 @@ Miscellaneous
     manifold.smacof
     manifold.spectral_embedding
     manifold.trustworthiness
-	
+
 
 .. _metrics_ref:
 
@@ -927,6 +936,7 @@ details.
 
    metrics.check_scoring
    metrics.get_scorer
+   metrics.get_scorer_names
    metrics.make_scorer
 
 Classification metrics
@@ -948,6 +958,7 @@ details.
    metrics.cohen_kappa_score
    metrics.confusion_matrix
    metrics.dcg_score
+   metrics.det_curve
    metrics.f1_score
    metrics.fbeta_score
    metrics.hamming_loss
@@ -963,6 +974,7 @@ details.
    metrics.recall_score
    metrics.roc_auc_score
    metrics.roc_curve
+   metrics.top_k_accuracy_score
    metrics.zero_one_loss
 
 Regression metrics
@@ -981,10 +993,15 @@ details.
    metrics.mean_squared_error
    metrics.mean_squared_log_error
    metrics.median_absolute_error
+   metrics.mean_absolute_percentage_error
    metrics.r2_score
    metrics.mean_poisson_deviance
    metrics.mean_gamma_deviance
    metrics.mean_tweedie_deviance
+   metrics.d2_tweedie_score
+   metrics.mean_pinball_loss
+   metrics.d2_pinball_score
+   metrics.d2_absolute_error_score
 
 Multilabel ranking metrics
 --------------------------
@@ -1022,11 +1039,13 @@ details.
    metrics.davies_bouldin_score
    metrics.completeness_score
    metrics.cluster.contingency_matrix
+   metrics.cluster.pair_confusion_matrix
    metrics.fowlkes_mallows_score
    metrics.homogeneity_completeness_v_measure
    metrics.homogeneity_score
    metrics.mutual_info_score
    metrics.normalized_mutual_info_score
+   metrics.rand_score
    metrics.silhouette_score
    metrics.silhouette_samples
    metrics.v_measure_score
@@ -1045,6 +1064,16 @@ further details.
 
    metrics.consensus_score
 
+Distance metrics
+----------------
+
+.. currentmodule:: sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   metrics.DistanceMetric
 
 Pairwise metrics
 ----------------
@@ -1099,6 +1128,7 @@ See the :ref:`visualizations` section of
    :template: function.rst
 
    metrics.plot_confusion_matrix
+   metrics.plot_det_curve
    metrics.plot_precision_recall_curve
    metrics.plot_roc_curve
 
@@ -1107,9 +1137,10 @@ See the :ref:`visualizations` section of
    :template: class.rst
 
    metrics.ConfusionMatrixDisplay
+   metrics.DetCurveDisplay
    metrics.PrecisionRecallDisplay
    metrics.RocCurveDisplay
-
+   calibration.CalibrationDisplay
 
 .. _mixture_ref:
 
@@ -1165,6 +1196,7 @@ Splitter Classes
    model_selection.ShuffleSplit
    model_selection.StratifiedKFold
    model_selection.StratifiedShuffleSplit
+   model_selection.StratifiedGroupKFold
    model_selection.TimeSeriesSplit
 
 Splitter Functions
@@ -1179,6 +1211,8 @@ Splitter Functions
    model_selection.check_cv
    model_selection.train_test_split
 
+.. _hyper_parameter_optimizers:
+
 Hyper-parameter optimizers
 --------------------------
 
@@ -1189,9 +1223,11 @@ Hyper-parameter optimizers
    :template: class.rst
 
    model_selection.GridSearchCV
+   model_selection.HalvingGridSearchCV
    model_selection.ParameterGrid
    model_selection.ParameterSampler
    model_selection.RandomizedSearchCV
+   model_selection.HalvingRandomSearchCV
 
 
 Model validation
@@ -1212,14 +1248,14 @@ Model validation
 
 .. _multiclass_ref:
 
-:mod:`sklearn.multiclass`: Multiclass and multilabel classification
-===================================================================
+:mod:`sklearn.multiclass`: Multiclass classification
+====================================================
 
 .. automodule:: sklearn.multiclass
    :no-members:
    :no-inherited-members:
 
-**User guide:** See the :ref:`multiclass` section for further details.
+**User guide:** See the :ref:`multiclass_classification` section for further details.
 
 .. currentmodule:: sklearn
 
@@ -1240,7 +1276,9 @@ Model validation
    :no-members:
    :no-inherited-members:
 
-**User guide:** See the :ref:`multiclass` section for further details.
+**User guide:** See the :ref:`multilabel_classification`,
+:ref:`multiclass_multioutput_classification`, and
+:ref:`multioutput_regression` sections for further details.
 
 .. currentmodule:: sklearn
 
@@ -1295,7 +1333,6 @@ Model validation
    :template: class.rst
 
    neighbors.BallTree
-   neighbors.DistanceMetric
    neighbors.KDTree
    neighbors.KernelDensity
    neighbors.KNeighborsClassifier
@@ -1319,7 +1356,7 @@ Model validation
 .. _neural_network_ref:
 
 :mod:`sklearn.neural_network`: Neural network models
-=====================================================
+====================================================
 
 .. automodule:: sklearn.neural_network
    :no-members:
@@ -1398,6 +1435,7 @@ details.
    preprocessing.PowerTransformer
    preprocessing.QuantileTransformer
    preprocessing.RobustScaler
+   preprocessing.SplineTransformer
    preprocessing.StandardScaler
 
 .. autosummary::
@@ -1445,7 +1483,7 @@ details.
 
 .. _semi_supervised_ref:
 
-:mod:`sklearn.semi_supervised` Semi-Supervised Learning
+:mod:`sklearn.semi_supervised`: Semi-Supervised Learning
 ========================================================
 
 .. automodule:: sklearn.semi_supervised
@@ -1462,6 +1500,7 @@ details.
 
    semi_supervised.LabelPropagation
    semi_supervised.LabelSpreading
+   semi_supervised.SelfTrainingClassifier
 
 
 .. _svm_ref:
@@ -1553,12 +1592,17 @@ Plotting
 
 .. autosummary::
    :toctree: generated/
+   :template: class.rst
+
+   utils.Bunch
+
+.. autosummary::
+   :toctree: generated/
    :template: function.rst
 
    utils.arrayfuncs.min_pos
    utils.as_float_array
    utils.assert_all_finite
-   utils.Bunch
    utils.check_X_y
    utils.check_array
    utils.check_scalar
@@ -1576,11 +1620,11 @@ Plotting
    utils.extmath.fast_logdet
    utils.extmath.density
    utils.extmath.weighted_mode
+   utils.gen_batches
    utils.gen_even_slices
    utils.graph.single_source_shortest_path_length
-   utils.graph_shortest_path.graph_shortest_path
    utils.indexable
-   utils.metaestimators.if_delegate_has_method
+   utils.metaestimators.available_if
    utils.multiclass.type_of_target
    utils.multiclass.is_multilabel
    utils.multiclass.unique_labels
@@ -1620,12 +1664,11 @@ Utilities from joblib:
 Recently deprecated
 ===================
 
-To be removed in 0.24
----------------------
+To be removed in 1.3
+--------------------
 
 .. autosummary::
    :toctree: generated/
-   :template: deprecated_function.rst
+   :template: function.rst
 
-   model_selection.fit_grid_point
-   utils.safe_indexing
+   utils.metaestimators.if_delegate_has_method
diff -pruN 0.23.2-5/doc/modules/clustering.rst 1.1.1-1/doc/modules/clustering.rst
--- 0.23.2-5/doc/modules/clustering.rst	2020-08-04 12:12:58.872675400 +0000
+++ 1.1.1-1/doc/modules/clustering.rst	2022-05-19 12:16:26.448782400 +0000
@@ -19,11 +19,11 @@ data can be found in the ``labels_`` att
 
     One important thing to note is that the algorithms implemented in
     this module can take different kinds of matrix as input. All the
-    methods accept standard data matrices of shape ``[n_samples, n_features]``.
+    methods accept standard data matrices of shape ``(n_samples, n_features)``.
     These can be obtained from the classes in the :mod:`sklearn.feature_extraction`
     module. For :class:`AffinityPropagation`, :class:`SpectralClustering`
     and :class:`DBSCAN` one can also input similarity matrices of shape
-    ``[n_samples, n_samples]``. These can be obtained from the functions
+    ``(n_samples, n_samples)``. These can be obtained from the functions
     in the :mod:`sklearn.metrics.pairwise` module.
 
 Overview of clustering methods
@@ -51,64 +51,74 @@ Overview of clustering methods
      - number of clusters
      - Very large ``n_samples``, medium ``n_clusters`` with
        :ref:`MiniBatch code <mini_batch_kmeans>`
-     - General-purpose, even cluster size, flat geometry, not too many clusters
+     - General-purpose, even cluster size, flat geometry,
+       not too many clusters, inductive
      - Distances between points
 
    * - :ref:`Affinity propagation <affinity_propagation>`
      - damping, sample preference
      - Not scalable with n_samples
-     - Many clusters, uneven cluster size, non-flat geometry
+     - Many clusters, uneven cluster size, non-flat geometry, inductive
      - Graph distance (e.g. nearest-neighbor graph)
 
    * - :ref:`Mean-shift <mean_shift>`
      - bandwidth
      - Not scalable with ``n_samples``
-     - Many clusters, uneven cluster size, non-flat geometry
+     - Many clusters, uneven cluster size, non-flat geometry, inductive
      - Distances between points
 
    * - :ref:`Spectral clustering <spectral_clustering>`
      - number of clusters
      - Medium ``n_samples``, small ``n_clusters``
-     - Few clusters, even cluster size, non-flat geometry
+     - Few clusters, even cluster size, non-flat geometry, transductive
      - Graph distance (e.g. nearest-neighbor graph)
 
    * - :ref:`Ward hierarchical clustering <hierarchical_clustering>`
      - number of clusters or distance threshold
      - Large ``n_samples`` and ``n_clusters``
-     - Many clusters, possibly connectivity constraints
+     - Many clusters, possibly connectivity constraints, transductive
      - Distances between points
 
    * - :ref:`Agglomerative clustering <hierarchical_clustering>`
      - number of clusters or distance threshold, linkage type, distance
      - Large ``n_samples`` and ``n_clusters``
      - Many clusters, possibly connectivity constraints, non Euclidean
-       distances
+       distances, transductive
      - Any pairwise distance
 
    * - :ref:`DBSCAN <dbscan>`
      - neighborhood size
      - Very large ``n_samples``, medium ``n_clusters``
-     - Non-flat geometry, uneven cluster sizes
+     - Non-flat geometry, uneven cluster sizes, outlier removal,
+       transductive
      - Distances between nearest points
 
    * - :ref:`OPTICS <optics>`
      - minimum cluster membership
      - Very large ``n_samples``, large ``n_clusters``
-     - Non-flat geometry, uneven cluster sizes, variable cluster density
+     - Non-flat geometry, uneven cluster sizes, variable cluster density,
+       outlier removal, transductive
      - Distances between points
 
    * - :ref:`Gaussian mixtures <mixture>`
      - many
      - Not scalable
-     - Flat geometry, good for density estimation
+     - Flat geometry, good for density estimation, inductive
      - Mahalanobis distances to  centers
 
-   * - :ref:`Birch`
+   * - :ref:`BIRCH <birch>`
      - branching factor, threshold, optional global clusterer.
      - Large ``n_clusters`` and ``n_samples``
-     - Large dataset, outlier removal, data reduction.
+     - Large dataset, outlier removal, data reduction, inductive
      - Euclidean distance between points
 
+   * - :ref:`Bisecting K-Means <bisect_k_means>`
+     - number of clusters
+     - Very large ``n_samples``, medium ``n_clusters``
+     - General-purpose, even cluster size, flat geometry,
+       no empty clusters, inductive, hierarchical
+     - Distances between points
+
 Non-flat geometry clustering is useful when the clusters have a specific
 shape, i.e. a non-flat manifold, and the standard euclidean distance is
 not the right metric. This case arises in the two top rows of the figure
@@ -119,6 +129,10 @@ Gaussian mixture models, useful for clus
 mixture models. KMeans can be seen as a special case of Gaussian mixture
 model with equal covariance per component.
 
+:term:`Transductive <transductive>` clustering methods (in contrast to
+:term:`inductive` clustering methods) are not designed to be applied to new,
+unseen data.
+
 .. _k_means:
 
 K-means
@@ -196,9 +210,13 @@ As a result, the computation is often do
 initializations of the centroids. One method to help address this issue is the
 k-means++ initialization scheme, which has been implemented in scikit-learn
 (use the ``init='k-means++'`` parameter). This initializes the centroids to be
-(generally) distant from each other, leading to provably better results than
+(generally) distant from each other, leading to probably better results than
 random initialization, as shown in the reference.
 
+K-means++ can also be called independently to select seeds for other
+clustering algorithms, see :func:`sklearn.cluster.kmeans_plusplus` for details
+and example usage.
+
 The algorithm supports sample weights, which can be given by a parameter
 ``sample_weight``. This allows to assign more weight to some samples when
 computing cluster centers and values of inertia. For example, assigning a
@@ -418,8 +436,8 @@ given sample.
 
 .. topic:: References:
 
- * `"Mean shift: A robust approach toward feature space analysis."
-   <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.76.8968&rep=rep1&type=pdf>`_
+ * :doi:`"Mean shift: A robust approach toward feature space analysis"
+   <10.1109/34.1000236>`
    D. Comaniciu and P. Meer, *IEEE Transactions on Pattern Analysis and Machine Intelligence* (2002)
 
 
@@ -440,7 +458,7 @@ to be specified in advance. It works wel
 but is not advised for many clusters.
 
 For two clusters, SpectralClustering solves a convex relaxation of the
-`normalised cuts <https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf>`_
+`normalized cuts <https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf>`_
 problem on the similarity graph: cutting the graph in two so that the weight of
 the edges cut is small compared to the weights of the edges inside each
 cluster. This criteria is especially interesting when working on images, where
@@ -481,11 +499,15 @@ computed using a function of a gradient
 
 .. |coin_kmeans| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_001.png
     :target: ../auto_examples/cluster/plot_coin_segmentation.html
-    :scale: 65
+    :scale: 35
 
 .. |coin_discretize| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_002.png
     :target: ../auto_examples/cluster/plot_coin_segmentation.html
-    :scale: 65
+    :scale: 35
+
+.. |coin_cluster_qr| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_003.png
+    :target: ../auto_examples/cluster/plot_coin_segmentation.html
+    :scale: 35
 
 Different label assignment strategies
 -------------------------------------
@@ -497,12 +519,26 @@ In particular, unless you control the ``
 reproducible from run-to-run, as it depends on random initialization.
 The alternative ``"discretize"`` strategy is 100% reproducible, but tends
 to create parcels of fairly even and geometrical shape.
+The recently added ``"cluster_qr"`` option is a deterministic alternative that
+tends to create the visually best partitioning on the example application
+below.
+
+================================  ================================  ================================
+ ``assign_labels="kmeans"``        ``assign_labels="discretize"``    ``assign_labels="cluster_qr"``
+================================  ================================  ================================
+|coin_kmeans|                          |coin_discretize|                  |coin_cluster_qr|
+================================  ================================  ================================
+
+.. topic:: References:
 
-=====================================  =====================================
- ``assign_labels="kmeans"``              ``assign_labels="discretize"``
-=====================================  =====================================
-|coin_kmeans|                          |coin_discretize|
-=====================================  =====================================
+ * `"Multiclass spectral clustering"
+   <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_
+   Stella X. Yu, Jianbo Shi, 2003
+
+ * :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>`
+   Anil Damle, Victor Minden, Lexing Ying, 2019
+
+.. _spectral_clustering_graph:
 
 Spectral Clustering Graphs
 --------------------------
@@ -518,12 +554,12 @@ graph, and SpectralClustering is initial
 
 .. topic:: References:
 
- * `"A Tutorial on Spectral Clustering"
-   <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323>`_
+ * :doi:`"A Tutorial on Spectral Clustering"
+   <10.1007/s11222-007-9033-z>`
    Ulrike von Luxburg, 2007
 
- * `"Normalized cuts and image segmentation"
-   <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324>`_
+ * :doi:`"Normalized cuts and image segmentation"
+   <10.1109/34.868688>`
    Jianbo Shi, Jitendra Malik, 2000
 
  * `"A Random Walks View of Spectral Segmentation"
@@ -534,9 +570,9 @@ graph, and SpectralClustering is initial
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100>`_
    Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001
 
- * `"Preconditioned Spectral Clustering for Stochastic
+ * :arxiv:`"Preconditioned Spectral Clustering for Stochastic
    Block Partition Streaming Graph Challenge"
-   <https://arxiv.org/abs/1708.07481>`_
+   <1708.07481>`
    David Zhuzhunashvili, Andrew Knyazev
 
 .. _hierarchical_clustering:
@@ -733,6 +769,65 @@ each class.
 
  * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py`
 
+Bisecting K-Means
+-----------------
+
+.. _bisect_k_means:
+
+The :class:`BisectingKMeans` is an iterative variant of :class:`KMeans`, using
+divisive hierarchical clustering. Instead of creating all centroids at once, centroids
+are picked progressively based on a previous clustering: a cluster is split into two
+new clusters repeatedly until the target number of clusters is reached.
+
+:class:`BisectingKMeans` is more efficient than :class:`KMeans` when the number the
+number of clusters is large since it only works on a subset of the data at each
+bisection while :class:`KMeans` always works on the entire dataset.
+
+Although :class:`BisectingKMeans` can't benefit from the advantages of the `"k-means++"`
+initialization by design, it will still produce comparable results than
+`KMeans(init="k-means++")` in terms of inertia at cheaper computational costs, and will
+likely produce better results than `KMeans` with a random initialization.
+
+This variant is more efficient to agglomerative clustering if the number of clusters is
+small compared to the number of data points.
+
+This variant also does not produce empty clusters.
+
+There exist two strategies for selecting the cluster to split:
+ - ``bisecting_strategy="largest_cluster"`` selects the cluster having the most points
+ - ``bisecting_strategy="biggest_inertia"`` selects the cluster with biggest inertia
+   (cluster with biggest Sum of Squared Errors within)
+
+Picking by largest amount of data points in most cases produces result as
+accurate as picking by inertia and is faster (especially for larger amount of data
+points, where calculating error may be costly).
+
+Picking by largest amount of data points will also likely produce clusters of similar
+sizes while `KMeans` is known to produce clusters of different sizes.
+
+Difference between Bisecting K-Means and regular K-Means can be seen on example
+:ref:`sphx_glr_auto_examples_cluster_plot_bisect_kmeans.py`.
+While the regular K-Means algorithm tends to create non-related clusters,
+clusters from Bisecting K-Means are well ordered and create quite a visible hierarchy.
+
+.. topic:: References:
+
+ * `"A Comparison of Document Clustering Techniques"
+   <http://www.philippe-fournier-viger.com/spmf/bisectingkmeans.pdf>`_
+   Michael Steinbach, George Karypis and Vipin Kumar,
+   Department of Computer Science and Egineering, University of Minnesota
+   (June 2000)
+ * `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog Data"
+   <https://ijeter.everscience.org/Manuscripts/Volume-4/Issue-8/Vol-4-issue-8-M-23.pdf>`_
+   K.Abirami and Dr.P.Mayilvahanan,
+   International Journal of Emerging Technologies in Engineering Research (IJETER)
+   Volume 4, Issue 8, (August 2016)
+ * `"Bisecting K-means Algorithm Based on K-valued Self-determining
+   and Clustering Center Optimization"
+   <http://www.jcomputers.us/vol13/jcp1306-01.pdf>`_
+   Jian Di, Xinyue Gou
+   School of Control and Computer Engineering,North China Electric Power University,
+   Baoding, Hebei, China (August 2017)
 
 .. _dbscan:
 
@@ -821,7 +916,7 @@ by black points below.
 
     This implementation is by default not memory efficient because it constructs
     a full pairwise similarity matrix in the case where kd-trees or ball-trees cannot
-    be used (e.g., with sparse matrices). This matrix will consume n^2 floats.
+    be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` floats.
     A couple of mechanisms for getting around this are:
 
     - Use :ref:`OPTICS <optics>` clustering in conjunction with the
@@ -926,8 +1021,8 @@ represented as children of a larger pare
     `HDBSCAN <https://hdbscan.readthedocs.io>`_. The HDBSCAN implementation is
     multithreaded, and has better algorithmic runtime complexity than OPTICS,
     at the cost of worse memory scaling. For extremely large datasets that
-    exhaust system memory using HDBSCAN, OPTICS will maintain *n* (as opposed
-    to *n^2*) memory scaling; however, tuning of the ``max_eps`` parameter
+    exhaust system memory using HDBSCAN, OPTICS will maintain :math:`n` (as opposed
+    to :math:`n^2`) memory scaling; however, tuning of the ``max_eps`` parameter
     will likely need to be used to give a solution in a reasonable amount of
     wall time.
 
@@ -939,7 +1034,7 @@ represented as children of a larger pare
 
 .. _birch:
 
-Birch
+BIRCH
 =====
 
 The :class:`Birch` builds a tree called the Clustering Feature Tree (CFT)
@@ -953,12 +1048,12 @@ The CF Subclusters hold the necessary in
 the need to hold the entire input data in memory. This information includes:
 
 - Number of samples in a subcluster.
-- Linear Sum - A n-dimensional vector holding the sum of all samples
+- Linear Sum - An n-dimensional vector holding the sum of all samples
 - Squared Sum - Sum of the squared L2 norm of all samples.
 - Centroids - To avoid recalculation linear sum / n_samples.
 - Squared norm of the centroids.
 
-The Birch algorithm has two parameters, the threshold and the branching factor.
+The BIRCH algorithm has two parameters, the threshold and the branching factor.
 The branching factor limits the number of subclusters in a node and the
 threshold limits the distance between the entering sample and the existing
 subclusters.
@@ -992,13 +1087,13 @@ clusters (labels) and the samples are ma
   then this node is again split into two and the process is continued
   recursively, till it reaches the root.
 
-**Birch or MiniBatchKMeans?**
+**BIRCH or MiniBatchKMeans?**
 
- - Birch does not scale very well to high dimensional data. As a rule of thumb if
+ - BIRCH does not scale very well to high dimensional data. As a rule of thumb if
    ``n_features`` is greater than twenty, it is generally better to use MiniBatchKMeans.
  - If the number of instances of data needs to be reduced, or if one wants a
    large number of subclusters either as a preprocessing step or otherwise,
-   Birch is more useful than MiniBatchKMeans.
+   BIRCH is more useful than MiniBatchKMeans.
 
 
 **How to use partial_fit?**
@@ -1043,86 +1138,121 @@ classes according to some similarity met
 
 .. currentmodule:: sklearn.metrics
 
+.. _rand_score:
 .. _adjusted_rand_score:
 
-Adjusted Rand index
--------------------
+Rand index
+----------
 
-Given the knowledge of the ground truth class assignments ``labels_true``
-and our clustering algorithm assignments of the same samples
-``labels_pred``, the **adjusted Rand index** is a function that measures
-the **similarity** of the two assignments, ignoring permutations and **with
-chance normalization**::
+Given the knowledge of the ground truth class assignments
+``labels_true`` and our clustering algorithm assignments of the same
+samples ``labels_pred``, the **(adjusted or unadjusted) Rand index**
+is a function that measures the **similarity** of the two assignments,
+ignoring permutations::
 
   >>> from sklearn import metrics
   >>> labels_true = [0, 0, 0, 1, 1, 1]
   >>> labels_pred = [0, 0, 1, 1, 2, 2]
+  >>> metrics.rand_score(labels_true, labels_pred)
+  0.66...
+
+The Rand index does not ensure to obtain a value close to 0.0 for a
+random labelling. The adjusted Rand index **corrects for chance** and
+will give such a baseline.
 
   >>> metrics.adjusted_rand_score(labels_true, labels_pred)
   0.24...
 
-One can permute 0 and 1 in the predicted labels, rename 2 to 3, and get
-the same score::
+As with all clustering metrics, one can permute 0 and 1 in the predicted
+labels, rename 2 to 3, and get the same score::
 
   >>> labels_pred = [1, 1, 0, 0, 3, 3]
+  >>> metrics.rand_score(labels_true, labels_pred)
+  0.66...
   >>> metrics.adjusted_rand_score(labels_true, labels_pred)
   0.24...
 
-Furthermore, :func:`adjusted_rand_score` is **symmetric**: swapping the argument
-does not change the score. It can thus be used as a **consensus
-measure**::
+Furthermore, both :func:`rand_score` :func:`adjusted_rand_score` are
+**symmetric**: swapping the argument does not change the scores. They can
+thus be used as **consensus measures**::
 
+  >>> metrics.rand_score(labels_pred, labels_true)
+  0.66...
   >>> metrics.adjusted_rand_score(labels_pred, labels_true)
   0.24...
 
 Perfect labeling is scored 1.0::
 
   >>> labels_pred = labels_true[:]
+  >>> metrics.rand_score(labels_true, labels_pred)
+  1.0
   >>> metrics.adjusted_rand_score(labels_true, labels_pred)
   1.0
 
-Bad (e.g. independent labelings) have negative or close to 0.0 scores::
-
-  >>> labels_true = [0, 1, 2, 0, 3, 4, 5, 1]
-  >>> labels_pred = [1, 1, 0, 0, 2, 2, 2, 2]
+Poorly agreeing labels (e.g. independent labelings) have lower scores,
+and for the adjusted Rand index the score will be negative or close to
+zero. However, for the unadjusted Rand index the score, while lower,
+will not necessarily be close to zero.::
+
+  >>> labels_true = [0, 0, 0, 0, 0, 0, 1, 1]
+  >>> labels_pred = [0, 1, 2, 3, 4, 5, 5, 6]
+  >>> metrics.rand_score(labels_true, labels_pred)
+  0.39...
   >>> metrics.adjusted_rand_score(labels_true, labels_pred)
-  -0.12...
+  -0.07...
 
 
 Advantages
 ~~~~~~~~~~
 
-- **Random (uniform) label assignments have a ARI score close to 0.0**
-  for any value of ``n_clusters`` and ``n_samples`` (which is not the
-  case for raw Rand index or the V-measure for instance).
-
-- **Bounded range [-1, 1]**: negative values are bad (independent
-  labelings), similar clusterings have a positive ARI, 1.0 is the perfect
-  match score.
-
-- **No assumption is made on the cluster structure**: can be used
-  to compare clustering algorithms such as k-means which assumes isotropic
-  blob shapes with results of spectral clustering algorithms which can
-  find cluster with "folded" shapes.
+- **Interpretability**: The unadjusted Rand index is proportional
+  to the number of sample pairs whose labels are the same in both
+  `labels_pred` and `labels_true`, or are different in both.
+
+- **Random (uniform) label assignments have an adjusted Rand index
+  score close to 0.0** for any value of ``n_clusters`` and
+  ``n_samples`` (which is not the case for the unadjusted Rand index
+  or the V-measure for instance).
+
+- **Bounded range**: Lower values indicate different labelings,
+  similar clusterings have a high (adjusted or unadjusted) Rand index,
+  1.0 is the perfect match score. The score range is [0, 1] for the
+  unadjusted Rand index and [-1, 1] for the adjusted Rand index.
+
+- **No assumption is made on the cluster structure**: The (adjusted or
+  unadjusted) Rand index can be used to compare all kinds of
+  clustering algorithms, and can be used to compare clustering
+  algorithms such as k-means which assumes isotropic blob shapes with
+  results of spectral clustering algorithms which can find cluster
+  with "folded" shapes.
 
 
 Drawbacks
 ~~~~~~~~~
 
-- Contrary to inertia, **ARI requires knowledge of the ground truth
-  classes** while is almost never available in practice or requires manual
-  assignment by human annotators (as in the supervised learning setting).
-
-  However ARI can also be useful in a purely unsupervised setting as a
-  building block for a Consensus Index that can be used for clustering
-  model selection (TODO).
-
+- Contrary to inertia, the **(adjusted or unadjusted) Rand index
+  requires knowledge of the ground truth classes** which is almost
+  never available in practice or requires manual assignment by human
+  annotators (as in the supervised learning setting).
+
+  However (adjusted or unadjusted) Rand index can also be useful in a
+  purely unsupervised setting as a building block for a Consensus
+  Index that can be used for clustering model selection (TODO).
+
+- The **unadjusted Rand index is often close to 1.0** even if the
+  clusterings themselves differ significantly. This can be understood
+  when interpreting the Rand index as the accuracy of element pair
+  labeling resulting from the clusterings: In practice there often is
+  a majority of element pairs that are assigned the ``different`` pair
+  label under both the predicted and the ground truth clustering
+  resulting in a high proportion of pair labels that agree, which
+  leads subsequently to a high score.
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis of
-   the impact of the dataset size on the value of clustering measures
-   for random assignments.
+ * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`:
+   Analysis of the impact of the dataset size on the value of
+   clustering measures for random assignments.
 
 
 Mathematical formulation
@@ -1137,14 +1267,16 @@ define :math:`a` and :math:`b` as:
 - :math:`b`, the number of pairs of elements that are in different sets
   in C and in different sets in K
 
-The raw (unadjusted) Rand index is then given by:
+The unadjusted Rand index is then given by:
 
 .. math:: \text{RI} = \frac{a + b}{C_2^{n_{samples}}}
 
-Where :math:`C_2^{n_{samples}}` is the total number of possible pairs
-in the dataset (without ordering).
+where :math:`C_2^{n_{samples}}` is the total number of possible pairs
+in the dataset. It does not matter if the calculation is performed on
+ordered pairs or unordered pairs as long as the calculation is
+performed consistently.
 
-However the RI score does not guarantee that random label assignments
+However, the Rand index does not guarantee that random label assignments
 will get a value close to zero (esp. if the number of clusters is in
 the same order of magnitude as the number of samples).
 
@@ -1159,9 +1291,17 @@ random labelings by defining the adjuste
    <https://link.springer.com/article/10.1007%2FBF01908075>`_
    L. Hubert and P. Arabie, Journal of Classification 1985
 
+ * `Properties of the Hubert-Arabie adjusted Rand index
+   <https://psycnet.apa.org/record/2004-17801-007>`_
+   D. Steinley, Psychological Methods 2004
+
+ * `Wikipedia entry for the Rand index
+   <https://en.wikipedia.org/wiki/Rand_index>`_
+
  * `Wikipedia entry for the adjusted Rand index
    <https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index>`_
 
+
 .. _mutual_info_score:
 
 Mutual Information based scores
@@ -1328,7 +1468,7 @@ more broadly common names.
 
  * `Wikipedia entry for the Adjusted Mutual Information
    <https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_
-   
+
  .. [VEB2009] Vinh, Epps, and Bailey, (2009). "Information theoretic measures
    for clusterings comparison". Proceedings of the 26th Annual International
    Conference on Machine Learning - ICML '09.
@@ -1339,13 +1479,13 @@ more broadly common names.
    Clusterings Comparison: Variants, Properties, Normalization and
    Correction for Chance". JMLR
    <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>
-   
+
  .. [YAT2016] Yang, Algesheimer, and Tessone, (2016). "A comparative analysis of
    community
    detection algorithms on artificial networks". Scientific Reports 6: 30750.
    `doi:10.1038/srep30750 <https://www.nature.com/articles/srep30750>`_.
-   
-   
+
+
 
 .. _homogeneity_completeness:
 
@@ -1598,7 +1738,7 @@ Drawbacks
 
   * E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two
     hierarchical clusterings". Journal of the American Statistical Association.
-    http://wildfire.stat.ucla.edu/pdflibrary/fowlkes.pdf
+    https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008
 
   * `Wikipedia entry for the Fowlkes-Mallows Index
     <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_
@@ -1647,10 +1787,9 @@ cluster analysis.
 
 .. topic:: References
 
- * Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
-   Interpretation and Validation of Cluster Analysis". Computational
-   and Applied Mathematics 20: 53–65.
-   `doi:10.1016/0377-0427(87)90125-7 <https://doi.org/10.1016/0377-0427(87)90125-7>`_.
+ * Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the
+   Interpretation and Validation of Cluster Analysis"<10.1016/0377-0427(87)90125-7>`
+   . Computational and Applied Mathematics 20: 53–65.
 
 
 Advantages
@@ -1683,12 +1822,12 @@ Calinski-Harabasz Index
 
 
 If the ground truth labels are not known, the Calinski-Harabasz index
-(:func:`sklearn.metrics.calinski_harabasz_score`) - also known as the Variance 
-Ratio Criterion - can be used to evaluate the model, where a higher 
+(:func:`sklearn.metrics.calinski_harabasz_score`) - also known as the Variance
+Ratio Criterion - can be used to evaluate the model, where a higher
 Calinski-Harabasz score relates to a model with better defined clusters.
 
 The index is the ratio of the sum of between-clusters dispersion and of
-inter-cluster dispersion for all clusters (where dispersion is defined as the
+within-cluster dispersion for all clusters (where dispersion is defined as the
 sum of distances squared):
 
   >>> from sklearn import metrics
@@ -1749,8 +1888,7 @@ number of points in cluster :math:`q`.
  * Caliński, T., & Harabasz, J. (1974).
    `"A Dendrite Method for Cluster Analysis"
    <https://www.researchgate.net/publication/233096619_A_Dendrite_Method_for_Cluster_Analysis>`_.
-   Communications in Statistics-theory and Methods 3: 1-27.
-   `doi:10.1080/03610927408827101 <https://doi.org/10.1080/03610927408827101>`_.
+   :doi:`Communications in Statistics-theory and Methods 3: 1-27 <10.1080/03610927408827101>`.
 
 
 .. _davies-bouldin_index:
@@ -1788,7 +1926,8 @@ Advantages
 ~~~~~~~~~~
 
 - The computation of Davies-Bouldin is simpler than that of Silhouette scores.
-- The index is computed only quantities and features inherent to the dataset.
+- The index is solely based on quantities and features inherent to the dataset
+  as its computation only uses point-wise distances.
 
 Drawbacks
 ~~~~~~~~~
@@ -1809,7 +1948,7 @@ this index, similarity is defined as a m
   the centroid of that cluster -- also know as cluster diameter.
 - :math:`d_{ij}`, the distance between cluster centroids :math:`i` and :math:`j`.
 
-A simple choice to construct :math:`R_ij` so that it is nonnegative and
+A simple choice to construct :math:`R_{ij}` so that it is nonnegative and
 symmetric is:
 
 .. math::
@@ -1824,15 +1963,13 @@ Then the Davies-Bouldin index is defined
 .. topic:: References
 
  * Davies, David L.; Bouldin, Donald W. (1979).
-   "A Cluster Separation Measure"
+   :doi:`"A Cluster Separation Measure" <10.1109/TPAMI.1979.4766909>`
    IEEE Transactions on Pattern Analysis and Machine Intelligence.
    PAMI-1 (2): 224-227.
-   `doi:10.1109/TPAMI.1979.4766909 <https://doi.org/10.1109/TPAMI.1979.4766909>`_.
 
  * Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001).
-   "On Clustering Validation Techniques"
+   :doi:`"On Clustering Validation Techniques" <10.1023/A:1012801612483>`
    Journal of Intelligent Information Systems, 17(2-3), 107-145.
-   `doi:10.1023/A:1012801612483 <https://doi.org/10.1023/A:1012801612483>`_.
 
  * `Wikipedia entry for Davies-Bouldin index
    <https://en.wikipedia.org/wiki/Davies–Bouldin_index>`_.
@@ -1893,3 +2030,85 @@ Drawbacks
 
  * `Wikipedia entry for contingency matrix
    <https://en.wikipedia.org/wiki/Contingency_table>`_
+
+.. _pair_confusion_matrix:
+
+Pair Confusion Matrix
+---------------------
+
+The pair confusion matrix
+(:func:`sklearn.metrics.cluster.pair_confusion_matrix`) is a 2x2
+similarity matrix
+
+.. math::
+   C = \left[\begin{matrix}
+   C_{00} & C_{01} \\
+   C_{10} & C_{11}
+   \end{matrix}\right]
+
+between two clusterings computed by considering all pairs of samples and
+counting pairs that are assigned into the same or into different clusters
+under the true and predicted clusterings.
+
+It has the following entries:
+
+  :math:`C_{00}` : number of pairs with both clusterings having the samples
+  not clustered together
+
+  :math:`C_{10}` : number of pairs with the true label clustering having the
+  samples clustered together but the other clustering not having the samples
+  clustered together
+
+  :math:`C_{01}` : number of pairs with the true label clustering not having
+  the samples clustered together but the other clustering having the samples
+  clustered together
+
+  :math:`C_{11}` : number of pairs with both clusterings having the samples
+  clustered together
+
+Considering a pair of samples that is clustered together a positive pair,
+then as in binary classification the count of true negatives is
+:math:`C_{00}`, false negatives is :math:`C_{10}`, true positives is
+:math:`C_{11}` and false positives is :math:`C_{01}`.
+
+Perfectly matching labelings have all non-zero entries on the
+diagonal regardless of actual label values::
+
+   >>> from sklearn.metrics.cluster import pair_confusion_matrix
+   >>> pair_confusion_matrix([0, 0, 1, 1], [0, 0, 1, 1])
+   array([[8, 0],
+          [0, 4]])
+
+::
+
+   >>> pair_confusion_matrix([0, 0, 1, 1], [1, 1, 0, 0])
+   array([[8, 0],
+          [0, 4]])
+
+Labelings that assign all classes members to the same clusters
+are complete but may not always be pure, hence penalized, and
+have some off-diagonal non-zero entries::
+
+   >>> pair_confusion_matrix([0, 0, 1, 2], [0, 0, 1, 1])
+   array([[8, 2],
+          [0, 2]])
+
+The matrix is not symmetric::
+
+   >>> pair_confusion_matrix([0, 0, 1, 1], [0, 0, 1, 2])
+   array([[8, 0],
+          [2, 2]])
+
+If classes members are completely split across different clusters, the
+assignment is totally incomplete, hence the matrix has all zero
+diagonal entries::
+
+   >>> pair_confusion_matrix([0, 0, 0, 0], [0, 1, 2, 3])
+   array([[ 0,  0],
+          [12,  0]])
+
+.. topic:: References
+
+ * L. Hubert and P. Arabie, Comparing Partitions, Journal of
+   Classification 1985
+   <https://link.springer.com/article/10.1007%2FBF01908075>_
diff -pruN 0.23.2-5/doc/modules/compose.rst 1.1.1-1/doc/modules/compose.rst
--- 0.23.2-5/doc/modules/compose.rst	2020-08-04 12:12:58.872675400 +0000
+++ 1.1.1-1/doc/modules/compose.rst	2022-05-19 12:16:26.448782400 +0000
@@ -139,6 +139,27 @@ or by name::
     >>> pipe['reduce_dim']
     PCA()
 
+To enable model inspection, :class:`~sklearn.pipeline.Pipeline` has a
+``get_feature_names_out()`` method, just like all transformers. You can use
+pipeline slicing to get the feature names going into each step::
+
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.feature_selection import SelectKBest
+    >>> iris = load_iris()
+    >>> pipe = Pipeline(steps=[
+    ...    ('select', SelectKBest(k=2)),
+    ...    ('clf', LogisticRegression())])
+    >>> pipe.fit(iris.data, iris.target)
+    Pipeline(steps=[('select', SelectKBest(...)), ('clf', LogisticRegression(...))])
+    >>> pipe[:-1].get_feature_names_out()
+    array(['x2', 'x3'], ...)
+
+You can also provide custom feature names for the input data using
+``get_feature_names_out``::
+
+    >>> pipe[:-1].get_feature_names_out(iris.feature_names)
+    array(['petal length (cm)', 'petal width (cm)'], ...)
+
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py`
@@ -147,8 +168,9 @@ or by name::
  * :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py`
  * :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py`
  * :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py`
+ * :ref:`sphx_glr_auto_examples_miscellaneous_plot_pipeline_display.py`
 
-.. topic:: See also:
+.. topic:: See Also:
 
  * :ref:`composite_grid_search`
 
@@ -251,12 +273,13 @@ the regressor that will be used for pred
 be applied to the target variable::
 
   >>> import numpy as np
-  >>> from sklearn.datasets import load_boston
+  >>> from sklearn.datasets import fetch_california_housing
   >>> from sklearn.compose import TransformedTargetRegressor
   >>> from sklearn.preprocessing import QuantileTransformer
   >>> from sklearn.linear_model import LinearRegression
   >>> from sklearn.model_selection import train_test_split
-  >>> X, y = load_boston(return_X_y=True)
+  >>> X, y = fetch_california_housing(return_X_y=True)
+  >>> X, y = X[:2000, :], y[:2000]  # select a subset of data
   >>> transformer = QuantileTransformer(output_distribution='normal')
   >>> regressor = LinearRegression()
   >>> regr = TransformedTargetRegressor(regressor=regressor,
@@ -265,10 +288,10 @@ be applied to the target variable::
   >>> regr.fit(X_train, y_train)
   TransformedTargetRegressor(...)
   >>> print('R2 score: {0:.2f}'.format(regr.score(X_test, y_test)))
-  R2 score: 0.67
+  R2 score: 0.61
   >>> raw_target_regr = LinearRegression().fit(X_train, y_train)
   >>> print('R2 score: {0:.2f}'.format(raw_target_regr.score(X_test, y_test)))
-  R2 score: 0.64
+  R2 score: 0.59
 
 For simple transformations, instead of a Transformer object, a pair of
 functions can be passed, defining the transformation and its inverse mapping::
@@ -286,7 +309,7 @@ Subsequently, the object is created as::
   >>> regr.fit(X_train, y_train)
   TransformedTargetRegressor(...)
   >>> print('R2 score: {0:.2f}'.format(regr.score(X_test, y_test)))
-  R2 score: 0.65
+  R2 score: 0.51
 
 By default, the provided functions are checked at each fit to be the inverse of
 each other. However, it is possible to bypass this checking by setting
@@ -301,7 +324,7 @@ each other. However, it is possible to b
   >>> regr.fit(X_train, y_train)
   TransformedTargetRegressor(...)
   >>> print('R2 score: {0:.2f}'.format(regr.score(X_test, y_test)))
-  R2 score: -4.50
+  R2 score: -1.57
 
 .. note::
 
@@ -329,7 +352,7 @@ and the feature matrices they output are
 larger matrix.
 
 When you want to apply different transformations to each field of the data,
-see the related class :class:`sklearn.compose.ColumnTransformer`
+see the related class :class:`~sklearn.compose.ColumnTransformer`
 (see :ref:`user guide <column_transformer>`).
 
 :class:`FeatureUnion` serves the same purposes as :class:`Pipeline` -
@@ -340,7 +363,7 @@ create complex models.
 
 (A :class:`FeatureUnion` has no way of checking whether two transformers
 might produce identical features. It only produces a union when the
-feature sets are disjoint, and making sure they are the caller's
+feature sets are disjoint, and making sure they are is the caller's
 responsibility.)
 
 
@@ -415,10 +438,8 @@ preprocessing or a specific feature extr
   ...      'user_rating': [4, 5, 4, 3]})
 
 For this data, we might want to encode the ``'city'`` column as a categorical
-variable using :class:`preprocessing.OneHotEncoder
-<sklearn.preprocessing.OneHotEncoder>` but apply a
-:class:`feature_extraction.text.CountVectorizer
-<sklearn.feature_extraction.text.CountVectorizer>` to the ``'title'`` column.
+variable using :class:`~sklearn.preprocessing.OneHotEncoder` but apply a
+:class:`~sklearn.feature_extraction.text.CountVectorizer` to the ``'title'`` column.
 As we might use multiple feature extraction methods on the same column, we give
 each transformer a unique name, say ``'city_category'`` and ``'title_bow'``.
 By default, the remaining rating columns are ignored (``remainder='drop'``)::
@@ -427,21 +448,20 @@ By default, the remaining rating columns
   >>> from sklearn.feature_extraction.text import CountVectorizer
   >>> from sklearn.preprocessing import OneHotEncoder
   >>> column_trans = ColumnTransformer(
-  ...     [('city_category', OneHotEncoder(dtype='int'),['city']),
+  ...     [('categories', OneHotEncoder(dtype='int'), ['city']),
   ...      ('title_bow', CountVectorizer(), 'title')],
-  ...     remainder='drop')
+  ...     remainder='drop', verbose_feature_names_out=False)
 
   >>> column_trans.fit(X)
-  ColumnTransformer(transformers=[('city_category', OneHotEncoder(dtype='int'),
+  ColumnTransformer(transformers=[('categories', OneHotEncoder(dtype='int'),
                                    ['city']),
-                                  ('title_bow', CountVectorizer(), 'title')])
+                                  ('title_bow', CountVectorizer(), 'title')],
+                    verbose_feature_names_out=False)
 
-  >>> column_trans.get_feature_names()
-  ['city_category__x0_London', 'city_category__x0_Paris', 'city_category__x0_Sallisaw',
-  'title_bow__bow', 'title_bow__feast', 'title_bow__grapes', 'title_bow__his',
-  'title_bow__how', 'title_bow__last', 'title_bow__learned', 'title_bow__moveable',
-  'title_bow__of', 'title_bow__the', 'title_bow__trick', 'title_bow__watson',
-  'title_bow__wrath']
+  >>> column_trans.get_feature_names_out()
+  array(['city_London', 'city_Paris', 'city_Sallisaw', 'bow', 'feast',
+  'grapes', 'his', 'how', 'last', 'learned', 'moveable', 'of', 'the',
+   'trick', 'watson', 'wrath'], ...)
 
   >>> column_trans.transform(X).toarray()
   array([[1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
@@ -452,13 +472,13 @@ By default, the remaining rating columns
 In the above example, the
 :class:`~sklearn.feature_extraction.text.CountVectorizer` expects a 1D array as
 input and therefore the columns were specified as a string (``'title'``).
-However, :class:`preprocessing.OneHotEncoder <sklearn.preprocessing.OneHotEncoder>`
+However, :class:`~sklearn.preprocessing.OneHotEncoder`
 as most of other transformers expects 2D data, therefore in that case you need
 to specify the column as a list of strings (``['city']``).
 
 Apart from a scalar or a single item list, the column selection can be specified
 as a list of multiple items, an integer array, a slice, a boolean mask, or
-with a :func:`~sklearn.compose.make_column_selector`. The 
+with a :func:`~sklearn.compose.make_column_selector`. The
 :func:`~sklearn.compose.make_column_selector` is used to select columns based
 on data type or column name::
 
@@ -528,23 +548,41 @@ above example would be::
                                   ('countvectorizer', CountVectorizer(),
                                    'title')])
 
+If :class:`~sklearn.compose.ColumnTransformer` is fitted with a dataframe
+and the dataframe only has string column names, then transforming a dataframe
+will use the column names to select the columns::
+
+
+  >>> ct = ColumnTransformer(
+  ...          [("scale", StandardScaler(), ["expert_rating"])]).fit(X)
+  >>> X_new = pd.DataFrame({"expert_rating": [5, 6, 1],
+  ...                       "ignored_new_col": [1.2, 0.3, -0.1]})
+  >>> ct.transform(X_new)
+  array([[ 0.9...],
+         [ 2.1...],
+         [-3.9...]])
+
 .. _visualizing_composite_estimators:
 
 Visualizing Composite Estimators
 ================================
 
-Estimators can be displayed with a HTML representation when shown in a
-jupyter notebook. This can be useful to diagnose or visualize a Pipeline with
-many estimators. This visualization is activated by setting the
-`display` option in :func:`sklearn.set_config`::
+Estimators are displayed with an HTML representation when shown in a
+jupyter notebook. This is useful to diagnose or visualize a Pipeline with
+many estimators. This visualization is activated by default::
+
+  >>> column_trans  # doctest: +SKIP
+
+It can be deactivated by setting the `display` option in :func:`~sklearn.set_config`
+to 'text'::
 
   >>> from sklearn import set_config
-  >>> set_config(display='diagram')   # doctest: +SKIP
-  >>> # diplays HTML representation in a jupyter context
+  >>> set_config(display='text')  # doctest: +SKIP
+  >>> # displays text representation in a jupyter context
   >>> column_trans  # doctest: +SKIP
 
-An example of the HTML output can be seen in the 
-**HTML representation of Pipeline** section of 
+An example of the HTML output can be seen in the
+**HTML representation of Pipeline** section of
 :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
 As an alternative, the HTML can be written to a file using
 :func:`~sklearn.utils.estimator_html_repr`::
diff -pruN 0.23.2-5/doc/modules/computing.rst 1.1.1-1/doc/modules/computing.rst
--- 0.23.2-5/doc/modules/computing.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/computing.rst	1970-01-01 00:00:00.000000000 +0000
@@ -1,711 +0,0 @@
-============================
-Computing with scikit-learn
-============================
-
-.. _scaling_strategies:
-
-Strategies to scale computationally: bigger data
-=================================================
-
-For some applications the amount of examples, features (or both) and/or the
-speed at which they need to be processed are challenging for traditional
-approaches. In these cases scikit-learn has a number of options you can
-consider to make your system scale.
-
-Scaling with instances using out-of-core learning
---------------------------------------------------
-
-Out-of-core (or "external memory") learning is a technique used to learn from
-data that cannot fit in a computer's main memory (RAM).
-
-Here is a sketch of a system designed to achieve this goal:
-
-  1. a way to stream instances
-  2. a way to extract features from instances
-  3. an incremental algorithm
-
-Streaming instances
-....................
-
-Basically, 1. may be a reader that yields instances from files on a
-hard drive, a database, from a network stream etc. However,
-details on how to achieve this are beyond the scope of this documentation.
-
-Extracting features
-...................
-
-\2. could be any relevant way to extract features among the
-different :ref:`feature extraction <feature_extraction>` methods supported by
-scikit-learn. However, when working with data that needs vectorization and
-where the set of features or values is not known in advance one should take
-explicit care. A good example is text classification where unknown terms are
-likely to be found during training. It is possible to use a stateful
-vectorizer if making multiple passes over the data is reasonable from an
-application point of view. Otherwise, one can turn up the difficulty by using
-a stateless feature extractor. Currently the preferred way to do this is to
-use the so-called :ref:`hashing trick<feature_hashing>` as implemented by
-:class:`sklearn.feature_extraction.FeatureHasher` for datasets with categorical
-variables represented as list of Python dicts or
-:class:`sklearn.feature_extraction.text.HashingVectorizer` for text documents.
-
-Incremental learning
-.....................
-
-Finally, for 3. we have a number of options inside scikit-learn. Although not
-all algorithms can learn incrementally (i.e. without seeing all the instances
-at once), all estimators implementing the ``partial_fit`` API are candidates.
-Actually, the ability to learn incrementally from a mini-batch of instances
-(sometimes called "online learning") is key to out-of-core learning as it
-guarantees that at any given time there will be only a small amount of
-instances in the main memory. Choosing a good size for the mini-batch that
-balances relevancy and memory footprint could involve some tuning [1]_.
-
-Here is a list of incremental estimators for different tasks:
-
-  - Classification
-      + :class:`sklearn.naive_bayes.MultinomialNB`
-      + :class:`sklearn.naive_bayes.BernoulliNB`
-      + :class:`sklearn.linear_model.Perceptron`
-      + :class:`sklearn.linear_model.SGDClassifier`
-      + :class:`sklearn.linear_model.PassiveAggressiveClassifier`
-      + :class:`sklearn.neural_network.MLPClassifier`
-  - Regression
-      + :class:`sklearn.linear_model.SGDRegressor`
-      + :class:`sklearn.linear_model.PassiveAggressiveRegressor`
-      + :class:`sklearn.neural_network.MLPRegressor`
-  - Clustering
-      + :class:`sklearn.cluster.MiniBatchKMeans`
-      + :class:`sklearn.cluster.Birch`
-  - Decomposition / feature Extraction
-      + :class:`sklearn.decomposition.MiniBatchDictionaryLearning`
-      + :class:`sklearn.decomposition.IncrementalPCA`
-      + :class:`sklearn.decomposition.LatentDirichletAllocation`
-  - Preprocessing
-      + :class:`sklearn.preprocessing.StandardScaler`
-      + :class:`sklearn.preprocessing.MinMaxScaler`
-      + :class:`sklearn.preprocessing.MaxAbsScaler`
-
-For classification, a somewhat important thing to note is that although a
-stateless feature extraction routine may be able to cope with new/unseen
-attributes, the incremental learner itself may be unable to cope with
-new/unseen targets classes. In this case you have to pass all the possible
-classes to the first ``partial_fit`` call using the ``classes=`` parameter.
-
-Another aspect to consider when choosing a proper algorithm is that not all of
-them put the same importance on each example over time. Namely, the
-``Perceptron`` is still sensitive to badly labeled examples even after many
-examples whereas the ``SGD*`` and ``PassiveAggressive*`` families are more
-robust to this kind of artifacts. Conversely, the latter also tend to give less
-importance to remarkably different, yet properly labeled examples when they
-come late in the stream as their learning rate decreases over time.
-
-Examples
-..........
-
-Finally, we have a full-fledged example of
-:ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. It is aimed at
-providing a starting point for people wanting to build out-of-core learning
-systems and demonstrates most of the notions discussed above.
-
-Furthermore, it also shows the evolution of the performance of different
-algorithms with the number of processed examples.
-
-.. |accuracy_over_time| image::  ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_001.png
-    :target: ../auto_examples/applications/plot_out_of_core_classification.html
-    :scale: 80
-
-.. centered:: |accuracy_over_time|
-
-Now looking at the computation time of the different parts, we see that the
-vectorization is much more expensive than learning itself. From the different
-algorithms, ``MultinomialNB`` is the most expensive, but its overhead can be
-mitigated by increasing the size of the mini-batches (exercise: change
-``minibatch_size`` to 100 and 10000 in the program and compare).
-
-.. |computation_time| image::  ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_003.png
-    :target: ../auto_examples/applications/plot_out_of_core_classification.html
-    :scale: 80
-
-.. centered:: |computation_time|
-
-
-Notes
-......
-
-.. [1] Depending on the algorithm the mini-batch size can influence results or
-       not. SGD*, PassiveAggressive*, and discrete NaiveBayes are truly online
-       and are not affected by batch size. Conversely, MiniBatchKMeans
-       convergence rate is affected by the batch size. Also, its memory
-       footprint can vary dramatically with batch size.
-
-.. _computational_performance:
-
-Computational Performance
-=========================
-
-For some applications the performance (mainly latency and throughput at
-prediction time) of estimators is crucial. It may also be of interest to
-consider the training throughput but this is often less important in a
-production setup (where it often takes place offline).
-
-We will review here the orders of magnitude you can expect from a number of
-scikit-learn estimators in different contexts and provide some tips and
-tricks for overcoming performance bottlenecks.
-
-Prediction latency is measured as the elapsed time necessary to make a
-prediction (e.g. in micro-seconds). Latency is often viewed as a distribution
-and operations engineers often focus on the latency at a given percentile of
-this distribution (e.g. the 90 percentile).
-
-Prediction throughput is defined as the number of predictions the software can
-deliver in a given amount of time (e.g. in predictions per second).
-
-An important aspect of performance optimization is also that it can hurt
-prediction accuracy. Indeed, simpler models (e.g. linear instead of
-non-linear, or with fewer parameters) often run faster but are not always able
-to take into account the same exact properties of the data as more complex ones.
-
-Prediction Latency
-------------------
-
-One of the most straight-forward concerns one may have when using/choosing a
-machine learning toolkit is the latency at which predictions can be made in a
-production environment.
-
-The main factors that influence the prediction latency are
-  1. Number of features
-  2. Input data representation and sparsity
-  3. Model complexity
-  4. Feature extraction
-
-A last major parameter is also the possibility to do predictions in bulk or
-one-at-a-time mode.
-
-Bulk versus Atomic mode
-........................
-
-In general doing predictions in bulk (many instances at the same time) is
-more efficient for a number of reasons (branching predictability, CPU cache,
-linear algebra libraries optimizations etc.). Here we see on a setting
-with few features that independently of estimator choice the bulk mode is
-always faster, and for some of them by 1 to 2 orders of magnitude:
-
-.. |atomic_prediction_latency| image::  ../auto_examples/applications/images/sphx_glr_plot_prediction_latency_001.png
-    :target: ../auto_examples/applications/plot_prediction_latency.html
-    :scale: 80
-
-.. centered:: |atomic_prediction_latency|
-
-.. |bulk_prediction_latency| image::  ../auto_examples/applications/images/sphx_glr_plot_prediction_latency_002.png
-    :target: ../auto_examples/applications/plot_prediction_latency.html
-    :scale: 80
-
-.. centered:: |bulk_prediction_latency|
-
-To benchmark different estimators for your case you can simply change the
-``n_features`` parameter in this example:
-:ref:`sphx_glr_auto_examples_applications_plot_prediction_latency.py`. This should give
-you an estimate of the order of magnitude of the prediction latency.
-
-Configuring Scikit-learn for reduced validation overhead
-.........................................................
-
-Scikit-learn does some validation on data that increases the overhead per
-call to ``predict`` and similar functions. In particular, checking that
-features are finite (not NaN or infinite) involves a full pass over the
-data. If you ensure that your data is acceptable, you may suppress
-checking for finiteness by setting the environment variable
-``SKLEARN_ASSUME_FINITE`` to a non-empty string before importing
-scikit-learn, or configure it in Python with :func:`sklearn.set_config`.
-For more control than these global settings, a :func:`config_context`
-allows you to set this configuration within a specified context::
-
-  >>> import sklearn
-  >>> with sklearn.config_context(assume_finite=True):
-  ...     pass  # do learning/prediction here with reduced validation
-
-Note that this will affect all uses of
-:func:`sklearn.utils.assert_all_finite` within the context.
-
-Influence of the Number of Features
-....................................
-
-Obviously when the number of features increases so does the memory
-consumption of each example. Indeed, for a matrix of :math:`M` instances
-with :math:`N` features, the space complexity is in :math:`O(NM)`.
-From a computing perspective it also means that the number of basic operations
-(e.g., multiplications for vector-matrix products in linear models) increases
-too. Here is a graph of the evolution of the prediction latency with the
-number of features:
-
-.. |influence_of_n_features_on_latency| image::  ../auto_examples/applications/images/sphx_glr_plot_prediction_latency_003.png
-    :target: ../auto_examples/applications/plot_prediction_latency.html
-    :scale: 80
-
-.. centered:: |influence_of_n_features_on_latency|
-
-Overall you can expect the prediction time to increase at least linearly with
-the number of features (non-linear cases can happen depending on the global
-memory footprint and estimator).
-
-Influence of the Input Data Representation
-...........................................
-
-Scipy provides sparse matrix data structures which are optimized for storing
-sparse data. The main feature of sparse formats is that you don't store zeros
-so if your data is sparse then you use much less memory. A non-zero value in
-a sparse (`CSR or CSC <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_)
-representation will only take on average one 32bit integer position + the 64
-bit floating point value + an additional 32bit per row or column in the matrix.
-Using sparse input on a dense (or sparse) linear model can speedup prediction
-by quite a bit as only the non zero valued features impact the dot product
-and thus the model predictions. Hence if you have 100 non zeros in 1e6
-dimensional space, you only need 100 multiply and add operation instead of 1e6.
-
-Calculation over a dense representation, however, may leverage highly optimised
-vector operations and multithreading in BLAS, and tends to result in fewer CPU
-cache misses. So the sparsity should typically be quite high (10% non-zeros
-max, to be checked depending on the hardware) for the sparse input
-representation to be faster than the dense input representation on a machine
-with many CPUs and an optimized BLAS implementation.
-
-Here is sample code to test the sparsity of your input::
-
-    def sparsity_ratio(X):
-        return 1.0 - np.count_nonzero(X) / float(X.shape[0] * X.shape[1])
-    print("input sparsity ratio:", sparsity_ratio(X))
-
-As a rule of thumb you can consider that if the sparsity ratio is greater
-than 90% you can probably benefit from sparse formats. Check Scipy's sparse
-matrix formats `documentation <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_
-for more information on how to build (or convert your data to) sparse matrix
-formats. Most of the time the ``CSR`` and ``CSC`` formats work best.
-
-Influence of the Model Complexity
-..................................
-
-Generally speaking, when model complexity increases, predictive power and
-latency are supposed to increase. Increasing predictive power is usually
-interesting, but for many applications we would better not increase
-prediction latency too much. We will now review this idea for different
-families of supervised models.
-
-For :mod:`sklearn.linear_model` (e.g. Lasso, ElasticNet,
-SGDClassifier/Regressor, Ridge & RidgeClassifier,
-PassiveAggressiveClassifier/Regressor, LinearSVC, LogisticRegression...) the
-decision function that is applied at prediction time is the same (a dot product)
-, so latency should be equivalent.
-
-Here is an example using
-:class:`sklearn.linear_model.SGDClassifier` with the
-``elasticnet`` penalty. The regularization strength is globally controlled by
-the ``alpha`` parameter. With a sufficiently high ``alpha``,
-one can then increase the ``l1_ratio`` parameter of ``elasticnet`` to
-enforce various levels of sparsity in the model coefficients. Higher sparsity
-here is interpreted as less model complexity as we need fewer coefficients to
-describe it fully. Of course sparsity influences in turn the prediction time
-as the sparse dot-product takes time roughly proportional to the number of
-non-zero coefficients.
-
-.. |en_model_complexity| image::  ../auto_examples/applications/images/sphx_glr_plot_model_complexity_influence_001.png
-    :target: ../auto_examples/applications/plot_model_complexity_influence.html
-    :scale: 80
-
-.. centered:: |en_model_complexity|
-
-For the :mod:`sklearn.svm` family of algorithms with a non-linear kernel,
-the latency is tied to the number of support vectors (the fewer the faster).
-Latency and throughput should (asymptotically) grow linearly with the number
-of support vectors in a SVC or SVR model. The kernel will also influence the
-latency as it is used to compute the projection of the input vector once per
-support vector. In the following graph the ``nu`` parameter of
-:class:`sklearn.svm.NuSVR` was used to influence the number of
-support vectors.
-
-.. |nusvr_model_complexity| image::  ../auto_examples/applications/images/sphx_glr_plot_model_complexity_influence_002.png
-    :target: ../auto_examples/applications/plot_model_complexity_influence.html
-    :scale: 80
-
-.. centered:: |nusvr_model_complexity|
-
-For :mod:`sklearn.ensemble` of trees (e.g. RandomForest, GBT,
-ExtraTrees etc) the number of trees and their depth play the most
-important role. Latency and throughput should scale linearly with the number
-of trees. In this case we used directly the ``n_estimators`` parameter of
-:class:`sklearn.ensemble.gradient_boosting.GradientBoostingRegressor`.
-
-.. |gbt_model_complexity| image::  ../auto_examples/applications/images/sphx_glr_plot_model_complexity_influence_003.png
-    :target: ../auto_examples/applications/plot_model_complexity_influence.html
-    :scale: 80
-
-.. centered:: |gbt_model_complexity|
-
-In any case be warned that decreasing model complexity can hurt accuracy as
-mentioned above. For instance a non-linearly separable problem can be handled
-with a speedy linear model but prediction power will very likely suffer in
-the process.
-
-Feature Extraction Latency
-..........................
-
-Most scikit-learn models are usually pretty fast as they are implemented
-either with compiled Cython extensions or optimized computing libraries.
-On the other hand, in many real world applications the feature extraction
-process (i.e. turning raw data like database rows or network packets into
-numpy arrays) governs the overall prediction time. For example on the Reuters
-text classification task the whole preparation (reading and parsing SGML
-files, tokenizing the text and hashing it into a common vector space) is
-taking 100 to 500 times more time than the actual prediction code, depending on
-the chosen model.
-
- .. |prediction_time| image::  ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_004.png
-    :target: ../auto_examples/applications/plot_out_of_core_classification.html
-    :scale: 80
-
-.. centered:: |prediction_time|
-
-In many cases it is thus recommended to carefully time and profile your
-feature extraction code as it may be a good place to start optimizing when
-your overall latency is too slow for your application.
-
-Prediction Throughput
-----------------------
-
-Another important metric to care about when sizing production systems is the
-throughput i.e. the number of predictions you can make in a given amount of
-time. Here is a benchmark from the
-:ref:`sphx_glr_auto_examples_applications_plot_prediction_latency.py` example that measures
-this quantity for a number of estimators on synthetic data:
-
-.. |throughput_benchmark| image::  ../auto_examples/applications/images/sphx_glr_plot_prediction_latency_004.png
-    :target: ../auto_examples/applications/plot_prediction_latency.html
-    :scale: 80
-
-.. centered:: |throughput_benchmark|
-
-These throughputs are achieved on a single process. An obvious way to
-increase the throughput of your application is to spawn additional instances
-(usually processes in Python because of the
-`GIL <https://wiki.python.org/moin/GlobalInterpreterLock>`_) that share the
-same model. One might also add machines to spread the load. A detailed
-explanation on how to achieve this is beyond the scope of this documentation
-though.
-
-Tips and Tricks
-----------------
-
-Linear algebra libraries
-.........................
-
-As scikit-learn relies heavily on Numpy/Scipy and linear algebra in general it
-makes sense to take explicit care of the versions of these libraries.
-Basically, you ought to make sure that Numpy is built using an optimized `BLAS
-<https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms>`_ /
-`LAPACK <https://en.wikipedia.org/wiki/LAPACK>`_ library.
-
-Not all models benefit from optimized BLAS and Lapack implementations. For
-instance models based on (randomized) decision trees typically do not rely on
-BLAS calls in their inner loops, nor do kernel SVMs (``SVC``, ``SVR``,
-``NuSVC``, ``NuSVR``).  On the other hand a linear model implemented with a
-BLAS DGEMM call (via ``numpy.dot``) will typically benefit hugely from a tuned
-BLAS implementation and lead to orders of magnitude speedup over a
-non-optimized BLAS.
-
-You can display the BLAS / LAPACK implementation used by your NumPy / SciPy /
-scikit-learn install with the following commands::
-
-    from numpy.distutils.system_info import get_info
-    print(get_info('blas_opt'))
-    print(get_info('lapack_opt'))
-
-Optimized BLAS / LAPACK implementations include:
- - Atlas (need hardware specific tuning by rebuilding on the target machine)
- - OpenBLAS
- - MKL
- - Apple Accelerate and vecLib frameworks (OSX only)
-
-More information can be found on the `Scipy install page <https://docs.scipy.org/doc/numpy/user/install.html>`_
-and in this
-`blog post <http://danielnouri.org/notes/2012/12/19/libblas-and-liblapack-issues-and-speed,-with-scipy-and-ubuntu/>`_
-from Daniel Nouri which has some nice step by step install instructions for
-Debian / Ubuntu.
-
-.. _working_memory:
-
-Limiting Working Memory
-........................
-
-Some calculations when implemented using standard numpy vectorized operations
-involve using a large amount of temporary memory.  This may potentially exhaust
-system memory.  Where computations can be performed in fixed-memory chunks, we
-attempt to do so, and allow the user to hint at the maximum size of this
-working memory (defaulting to 1GB) using :func:`sklearn.set_config` or
-:func:`config_context`.  The following suggests to limit temporary working
-memory to 128 MiB::
-
-  >>> import sklearn
-  >>> with sklearn.config_context(working_memory=128):
-  ...     pass  # do chunked work here
-
-An example of a chunked operation adhering to this setting is
-:func:`metric.pairwise_distances_chunked`, which facilitates computing
-row-wise reductions of a pairwise distance matrix.
-
-Model Compression
-..................
-
-Model compression in scikit-learn only concerns linear models for the moment.
-In this context it means that we want to control the model sparsity (i.e. the
-number of non-zero coordinates in the model vectors). It is generally a good
-idea to combine model sparsity with sparse input data representation.
-
-Here is sample code that illustrates the use of the ``sparsify()`` method::
-
-    clf = SGDRegressor(penalty='elasticnet', l1_ratio=0.25)
-    clf.fit(X_train, y_train).sparsify()
-    clf.predict(X_test)
-
-In this example we prefer the ``elasticnet`` penalty as it is often a good
-compromise between model compactness and prediction power. One can also
-further tune the ``l1_ratio`` parameter (in combination with the
-regularization strength ``alpha``) to control this tradeoff.
-
-A typical `benchmark <https://github.com/scikit-learn/scikit-learn/blob/master/benchmarks/bench_sparsify.py>`_
-on synthetic data yields a >30% decrease in latency when both the model and
-input are sparse (with 0.000024 and 0.027400 non-zero coefficients ratio
-respectively). Your mileage may vary depending on the sparsity and size of
-your data and model.
-Furthermore, sparsifying can be very useful to reduce the memory usage of
-predictive models deployed on production servers.
-
-Model Reshaping
-................
-
-Model reshaping consists in selecting only a portion of the available features
-to fit a model. In other words, if a model discards features during the
-learning phase we can then strip those from the input. This has several
-benefits. Firstly it reduces memory (and therefore time) overhead of the
-model itself. It also allows to discard explicit
-feature selection components in a pipeline once we know which features to
-keep from a previous run. Finally, it can help reduce processing time and I/O
-usage upstream in the data access and feature extraction layers by not
-collecting and building features that are discarded by the model. For instance
-if the raw data come from a database, it can make it possible to write simpler
-and faster queries or reduce I/O usage by making the queries return lighter
-records.
-At the moment, reshaping needs to be performed manually in scikit-learn.
-In the case of sparse input (particularly in ``CSR`` format), it is generally
-sufficient to not generate the relevant features, leaving their columns empty.
-
-Links
-......
-
-  - :ref:`scikit-learn developer performance documentation <performance-howto>`
-  - `Scipy sparse matrix formats documentation <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_
-
-Parallelism, resource management, and configuration
-===================================================
-
-.. _parallelism:
-
-Parallelism
------------
-
-Some scikit-learn estimators and utilities can parallelize costly operations
-using multiple CPU cores, thanks to the following components:
-
-- via the `joblib <https://joblib.readthedocs.io/en/latest/>`_ library. In
-  this case the number of threads or processes can be controlled with the
-  ``n_jobs`` parameter.
-- via OpenMP, used in C or Cython code.
-
-In addition, some of the numpy routines that are used internally by
-scikit-learn may also be parallelized if numpy is installed with specific
-numerical libraries such as MKL, OpenBLAS, or BLIS.
-
-We describe these 3 scenarios in the following subsections.
-
-Joblib-based parallelism
-........................
-
-When the underlying implementation uses joblib, the number of workers
-(threads or processes) that are spawned in parallel can be controlled via the
-``n_jobs`` parameter.
-
-.. note::
-
-    Where (and how) parallelization happens in the estimators is currently
-    poorly documented. Please help us by improving our docs and tackle `issue
-    14228 <https://github.com/scikit-learn/scikit-learn/issues/14228>`_!
-
-Joblib is able to support both multi-processing and multi-threading. Whether
-joblib chooses to spawn a thread or a process depends on the **backend**
-that it's using.
-
-Scikit-learn generally relies on the ``loky`` backend, which is joblib's
-default backend. Loky is a multi-processing backend. When doing
-multi-processing, in order to avoid duplicating the memory in each process
-(which isn't reasonable with big datasets), joblib will create a `memmap
-<https://docs.scipy.org/doc/numpy/reference/generated/numpy.memmap.html>`_
-that all processes can share, when the data is bigger than 1MB.
-
-In some specific cases (when the code that is run in parallel releases the
-GIL), scikit-learn will indicate to ``joblib`` that a multi-threading
-backend is preferable.
-
-As a user, you may control the backend that joblib will use (regardless of
-what scikit-learn recommends) by using a context manager::
-
-    from joblib import parallel_backend
-
-    with parallel_backend('threading', n_jobs=2):
-        # Your scikit-learn code here
-
-Please refer to the `joblib's docs
-<https://joblib.readthedocs.io/en/latest/parallel.html#thread-based-parallelism-vs-process-based-parallelism>`_
-for more details.
-
-In practice, whether parallelism is helpful at improving runtime depends on
-many factors. It is usually a good idea to experiment rather than assuming
-that increasing the number of workers is always a good thing. In some cases
-it can be highly detrimental to performance to run multiple copies of some
-estimators or functions in parallel (see oversubscription below).
-
-OpenMP-based parallelism
-........................
-
-OpenMP is used to parallelize code written in Cython or C, relying on
-multi-threading exclusively. By default (and unless joblib is trying to
-avoid oversubscription), the implementation will use as many threads as
-possible.
-
-You can control the exact number of threads that are used via the
-``OMP_NUM_THREADS`` environment variable::
-
-    OMP_NUM_THREADS=4 python my_script.py
-
-Parallel Numpy routines from numerical libraries
-................................................
-
-Scikit-learn relies heavily on NumPy and SciPy, which internally call
-multi-threaded linear algebra routines implemented in libraries such as MKL,
-OpenBLAS or BLIS.
-
-The number of threads used by the OpenBLAS, MKL or BLIS libraries can be set
-via the ``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS``, and
-``BLIS_NUM_THREADS`` environment variables.
-
-Please note that scikit-learn has no direct control over these
-implementations. Scikit-learn solely relies on Numpy and Scipy.
-
-.. note::
-    At the time of writing (2019), NumPy and SciPy packages distributed on
-    pypi.org (used by ``pip``) and on the conda-forge channel are linked
-    with OpenBLAS, while conda packages shipped on the "defaults" channel
-    from anaconda.org are linked by default with MKL.
-
-
-Oversubscription: spawning too many threads
-...........................................
-
-It is generally recommended to avoid using significantly more processes or
-threads than the number of CPUs on a machine. Over-subscription happens when
-a program is running too many threads at the same time.
-
-Suppose you have a machine with 8 CPUs. Consider a case where you're running
-a :class:`~GridSearchCV` (parallelized with joblib) with ``n_jobs=8`` over
-a :class:`~HistGradientBoostingClassifier` (parallelized with OpenMP). Each
-instance of :class:`~HistGradientBoostingClassifier` will spawn 8 threads
-(since you have 8 CPUs). That's a total of ``8 * 8 = 64`` threads, which
-leads to oversubscription of physical CPU resources and to scheduling
-overhead.
-
-Oversubscription can arise in the exact same fashion with parallelized
-routines from MKL, OpenBLAS or BLIS that are nested in joblib calls.
-
-Starting from ``joblib >= 0.14``, when the ``loky`` backend is used (which
-is the default), joblib will tell its child **processes** to limit the
-number of threads they can use, so as to avoid oversubscription. In practice
-the heuristic that joblib uses is to tell the processes to use ``max_threads
-= n_cpus // n_jobs``, via their corresponding environment variable. Back to
-our example from above, since the joblib backend of :class:`~GridSearchCV`
-is ``loky``, each process will only be able to use 1 thread instead of 8,
-thus mitigating the oversubscription issue.
-
-Note that:
-
-- Manually setting one of the environment variables (``OMP_NUM_THREADS``,
-  ``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS``, or ``BLIS_NUM_THREADS``)
-  will take precedence over what joblib tries to do. The total number of
-  threads will be ``n_jobs * <LIB>_NUM_THREADS``. Note that setting this
-  limit will also impact your computations in the main process, which will
-  only use ``<LIB>_NUM_THREADS``. Joblib exposes a context manager for
-  finer control over the number of threads in its workers (see joblib docs
-  linked below).
-- Joblib is currently unable to avoid oversubscription in a
-  multi-threading context. It can only do so with the ``loky`` backend
-  (which spawns processes).
-
-You will find additional details about joblib mitigation of oversubscription
-in `joblib documentation
-<https://joblib.readthedocs.io/en/latest/parallel.html#avoiding-over-subscription-of-cpu-ressources>`_.
-
-
-Configuration switches
------------------------
-
-Python runtime
-..............
-
-:func:`sklearn.set_config` controls the following behaviors:
-
-:assume_finite:
-
-    used to skip validation, which enables faster computations but may
-    lead to segmentation faults if the data contains NaNs.
-
-:working_memory:
-
-    the optimal size of temporary arrays used by some algorithms.
-
-.. _environment_variable:
-
-Environment variables
-......................
-
-These environment variables should be set before importing scikit-learn.
-
-:SKLEARN_SITE_JOBLIB:
-
-    When this environment variable is set to a non zero value,
-    scikit-learn uses the site joblib rather than its vendored version.
-    Consequently, joblib must be installed for scikit-learn to run.
-    Note that using the site joblib is at your own risks: the versions of
-    scikit-learn and joblib need to be compatible. Currently, joblib 0.11+
-    is supported. In addition, dumps from joblib.Memory might be incompatible,
-    and you might loose some caches and have to redownload some datasets.
-
-    .. deprecated:: 0.21
-
-       As of version 0.21 this parameter has no effect, vendored joblib was
-       removed and site joblib is always used.
-
-:SKLEARN_ASSUME_FINITE:
-
-    Sets the default value for the `assume_finite` argument of
-    :func:`sklearn.set_config`.
-
-:SKLEARN_WORKING_MEMORY:
-
-    Sets the default value for the `working_memory` argument of
-    :func:`sklearn.set_config`.
-
-:SKLEARN_SEED:
-
-    Sets the seed of the global random generator when running the tests,
-    for reproducibility.
-
-:SKLEARN_SKIP_NETWORK_TESTS:
-
-    When this environment variable is set to a non zero value, the tests
-    that need network access are skipped.
diff -pruN 0.23.2-5/doc/modules/covariance.rst 1.1.1-1/doc/modules/covariance.rst
--- 0.23.2-5/doc/modules/covariance.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/covariance.rst	2022-05-19 12:16:26.448782400 +0000
@@ -27,8 +27,8 @@ by the classical *maximum likelihood est
 covariance"), provided the number of observations is large enough
 compared to the number of features (the variables describing the
 observations). More precisely, the Maximum Likelihood Estimator of a
-sample is an unbiased estimator of the corresponding population's
-covariance matrix.
+sample is an asymptotically unbiased estimator of the corresponding
+population's covariance matrix.
 
 The empirical covariance matrix of a sample can be computed using the
 :func:`empirical_covariance` function of the package, or by fitting an
@@ -55,8 +55,8 @@ Shrunk Covariance
 Basic shrinkage
 ---------------
 
-Despite being an unbiased estimator of the covariance matrix, the
-Maximum Likelihood Estimator is not a good estimator of the
+Despite being an asymptotically unbiased estimator of the covariance matrix,
+the Maximum Likelihood Estimator is not a good estimator of the
 eigenvalues of the covariance matrix, so the precision matrix obtained
 from its inversion is not accurate. Sometimes, it even occurs that the
 empirical covariance matrix cannot be inverted for numerical
diff -pruN 0.23.2-5/doc/modules/cross_decomposition.rst 1.1.1-1/doc/modules/cross_decomposition.rst
--- 0.23.2-5/doc/modules/cross_decomposition.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/cross_decomposition.rst	2022-05-19 12:16:26.448782400 +0000
@@ -6,12 +6,9 @@ Cross decomposition
 
 .. currentmodule:: sklearn.cross_decomposition
 
-The cross decomposition module contains two main families of algorithms: the
-partial least squares (PLS) and the canonical correlation analysis (CCA).
-
-These families of algorithms are useful to find linear relations between two
-multivariate datasets: the ``X`` and ``Y`` arguments of the ``fit`` method
-are 2D arrays.
+The cross decomposition module contains **supervised** estimators for
+dimensionality reduction and regression, belonging to the "Partial Least
+Squares" family.
 
 .. figure:: ../auto_examples/cross_decomposition/images/sphx_glr_plot_compare_cross_decomposition_001.png
    :target: ../auto_examples/cross_decomposition/plot_compare_cross_decomposition.html
@@ -23,20 +20,175 @@ Cross decomposition algorithms find the
 matrices (X and Y). They are latent variable approaches to modeling the
 covariance structures in these two spaces. They will try to find the
 multidimensional direction in the X space that explains the maximum
-multidimensional variance direction in the Y space. PLS-regression is
-particularly suited when the matrix of predictors has more variables than
-observations, and when there is multicollinearity among X values. By contrast,
-standard regression will fail in these cases.
+multidimensional variance direction in the Y space. In other words, PLS
+projects both `X` and `Y` into a lower-dimensional subspace such that the
+covariance between `transformed(X)` and `transformed(Y)` is maximal.
+
+PLS draws similarities with `Principal Component Regression
+<https://en.wikipedia.org/wiki/Principal_component_regression>`_ (PCR), where
+the samples are first projected into a lower-dimensional subspace, and the
+targets `y` are predicted using `transformed(X)`. One issue with PCR is that
+the dimensionality reduction is unsupervized, and may lose some important
+variables: PCR would keep the features with the most variance, but it's
+possible that features with a small variances are relevant from predicting
+the target. In a way, PLS allows for the same kind of dimensionality
+reduction, but by taking into account the targets `y`. An illustration of
+this fact is given in the following example:
+* :ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py`.
+
+Apart from CCA, the PLS estimators are particularly suited when the matrix of
+predictors has more variables than observations, and when there is
+multicollinearity among the features. By contrast, standard linear regression
+would fail in these cases unless it is regularized.
 
-Classes included in this module are :class:`PLSRegression`
+Classes included in this module are :class:`PLSRegression`,
 :class:`PLSCanonical`, :class:`CCA` and :class:`PLSSVD`
 
+PLSCanonical
+------------
+
+We here describe the algorithm used in :class:`PLSCanonical`. The other
+estimators use variants of this algorithm, and are detailed below.
+We recommend section [1]_ for more details and comparisons between these
+algorithms. In [1]_, :class:`PLSCanonical` corresponds to "PLSW2A".
+
+Given two centered matrices :math:`X \in \mathbb{R}^{n \times d}` and
+:math:`Y \in \mathbb{R}^{n \times t}`, and a number of components :math:`K`,
+:class:`PLSCanonical` proceeds as follows:
+
+Set :math:`X_1` to :math:`X` and :math:`Y_1` to :math:`Y`. Then, for each
+:math:`k \in [1, K]`:
+
+- a) compute :math:`u_k \in \mathbb{R}^d` and :math:`v_k \in \mathbb{R}^t`,
+  the first left and right singular vectors of the cross-covariance matrix
+  :math:`C = X_k^T Y_k`.
+  :math:`u_k` and :math:`v_k` are called the *weights*.
+  By definition, :math:`u_k` and :math:`v_k` are
+  chosen so that they maximize the covariance between the projected
+  :math:`X_k` and the projected target, that is :math:`\text{Cov}(X_k u_k,
+  Y_k v_k)`.
+- b) Project :math:`X_k` and :math:`Y_k` on the singular vectors to obtain
+  *scores*: :math:`\xi_k = X_k u_k` and :math:`\omega_k = Y_k v_k`
+- c) Regress :math:`X_k` on :math:`\xi_k`, i.e. find a vector :math:`\gamma_k
+  \in \mathbb{R}^d` such that the rank-1 matrix :math:`\xi_k \gamma_k^T`
+  is as close as possible to :math:`X_k`. Do the same on :math:`Y_k` with
+  :math:`\omega_k` to obtain :math:`\delta_k`. The vectors
+  :math:`\gamma_k` and :math:`\delta_k` are called the *loadings*.
+- d) *deflate* :math:`X_k` and :math:`Y_k`, i.e. subtract the rank-1
+  approximations: :math:`X_{k+1} = X_k - \xi_k \gamma_k^T`, and
+  :math:`Y_{k + 1} = Y_k - \omega_k \delta_k^T`.
+
+At the end, we have approximated :math:`X` as a sum of rank-1 matrices:
+:math:`X = \Xi \Gamma^T` where :math:`\Xi \in \mathbb{R}^{n \times K}`
+contains the scores in its columns, and :math:`\Gamma^T \in \mathbb{R}^{K
+\times d}` contains the loadings in its rows. Similarly for :math:`Y`, we
+have :math:`Y = \Omega \Delta^T`.
+
+Note that the scores matrices :math:`\Xi` and :math:`\Omega` correspond to
+the projections of the training data :math:`X` and :math:`Y`, respectively.
+
+Step *a)* may be performed in two ways: either by computing the whole SVD of
+:math:`C` and only retain the singular vectors with the biggest singular
+values, or by directly computing the singular vectors using the power method (cf section 11.3 in [1]_),
+which corresponds to the `'nipals'` option of the `algorithm` parameter.
+
+
+Transforming data
+^^^^^^^^^^^^^^^^^
+
+To transform :math:`X` into :math:`\bar{X}`, we need to find a projection
+matrix :math:`P` such that :math:`\bar{X} = XP`. We know that for the
+training data, :math:`\Xi = XP`, and :math:`X = \Xi \Gamma^T`. Setting
+:math:`P = U(\Gamma^T U)^{-1}` where :math:`U` is the matrix with the
+:math:`u_k` in the columns, we have :math:`XP = X U(\Gamma^T U)^{-1} = \Xi
+(\Gamma^T U) (\Gamma^T U)^{-1} = \Xi` as desired. The rotation matrix
+:math:`P` can be accessed from the `x_rotations_` attribute.
+
+Similarly, :math:`Y` can be transformed using the rotation matrix
+:math:`V(\Delta^T V)^{-1}`, accessed via the `y_rotations_` attribute.
+
+Predicting the targets Y
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+To predict the targets of some data :math:`X`, we are looking for a
+coefficient matrix :math:`\beta \in R^{d \times t}` such that :math:`Y =
+X\beta`.
+
+The idea is to try to predict the transformed targets :math:`\Omega` as a
+function of the transformed samples :math:`\Xi`, by computing :math:`\alpha
+\in \mathbb{R}` such that :math:`\Omega = \alpha \Xi`.
+
+Then, we have :math:`Y = \Omega \Delta^T = \alpha \Xi \Delta^T`, and since
+:math:`\Xi` is the transformed training data we have that :math:`Y = X \alpha
+P \Delta^T`, and as a result the coefficient matrix :math:`\beta = \alpha P
+\Delta^T`.
+
+:math:`\beta` can be accessed through the `coef_` attribute.
+
+PLSSVD
+------
+
+:class:`PLSSVD` is a simplified version of :class:`PLSCanonical`
+described earlier: instead of iteratively deflating the matrices :math:`X_k`
+and :math:`Y_k`, :class:`PLSSVD` computes the SVD of :math:`C = X^TY`
+only *once*, and stores the `n_components` singular vectors corresponding to
+the biggest singular values in the matrices `U` and `V`, corresponding to the
+`x_weights_` and `y_weights_` attributes. Here, the transformed data is
+simply `transformed(X) = XU` and `transformed(Y) = YV`.
+
+If `n_components == 1`, :class:`PLSSVD` and :class:`PLSCanonical` are
+strictly equivalent.
+
+PLSRegression
+-------------
+
+The :class:`PLSRegression` estimator is similar to
+:class:`PLSCanonical` with `algorithm='nipals'`, with 2 significant
+differences:
+
+- at step a) in the power method to compute :math:`u_k` and :math:`v_k`,
+  :math:`v_k` is never normalized.
+- at step c), the targets :math:`Y_k` are approximated using the projection
+  of :math:`X_k` (i.e. :math:`\xi_k`) instead of the projection of
+  :math:`Y_k` (i.e. :math:`\omega_k`). In other words, the loadings
+  computation is different. As a result, the deflation in step d) will also
+  be affected.
+
+These two modifications affect the output of `predict` and `transform`,
+which are not the same as for :class:`PLSCanonical`. Also, while the number
+of components is limited by `min(n_samples, n_features, n_targets)` in
+:class:`PLSCanonical`, here the limit is the rank of :math:`X^TX`, i.e.
+`min(n_samples, n_features)`.
+
+:class:`PLSRegression` is also known as PLS1 (single targets) and PLS2
+(multiple targets). Much like :class:`~sklearn.linear_model.Lasso`,
+:class:`PLSRegression` is a form of regularized linear regression where the
+number of components controls the strength of the regularization.
+
+Canonical Correlation Analysis
+------------------------------
+
+Canonical Correlation Analysis was developed prior and independently to PLS.
+But it turns out that :class:`CCA` is a special case of PLS, and corresponds
+to PLS in "Mode B" in the literature.
+
+:class:`CCA` differs from :class:`PLSCanonical` in the way the weights
+:math:`u_k` and :math:`v_k` are computed in the power method of step a).
+Details can be found in section 10 of [1]_.
+
+Since :class:`CCA` involves the inversion of :math:`X_k^TX_k` and
+:math:`Y_k^TY_k`, this estimator can be unstable if the number of features or
+targets is greater than the number of samples.
+
 
 .. topic:: Reference:
 
-   * JA Wegelin
-     `A survey of Partial Least Squares (PLS) methods, with emphasis on the two-block case <https://www.stat.washington.edu/research/reports/2000/tr371.pdf>`_
+   .. [1] `A survey of Partial Least Squares (PLS) methods, with emphasis on
+      the two-block case
+      <https://www.stat.washington.edu/research/reports/2000/tr371.pdf>`_
+      JA Wegelin
 
 .. topic:: Examples:
 
     * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`
+    * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py`
diff -pruN 0.23.2-5/doc/modules/cross_validation.rst 1.1.1-1/doc/modules/cross_validation.rst
--- 0.23.2-5/doc/modules/cross_validation.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/cross_validation.rst	2022-05-19 12:16:26.448782400 +0000
@@ -116,16 +116,15 @@ a model and computing the score 5 consec
 time)::
 
   >>> from sklearn.model_selection import cross_val_score
-  >>> clf = svm.SVC(kernel='linear', C=1)
+  >>> clf = svm.SVC(kernel='linear', C=1, random_state=42)
   >>> scores = cross_val_score(clf, X, y, cv=5)
   >>> scores
-  array([0.96..., 1.  ..., 0.96..., 0.96..., 1.        ])
+  array([0.96..., 1. , 0.96..., 0.96..., 1. ])
 
-The mean score and the 95\% confidence interval of the score estimate are hence
-given by::
+The mean score and the standard deviation are hence given by::
 
-  >>> print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
-  Accuracy: 0.98 (+/- 0.03)
+  >>> print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))
+  0.98 accuracy with a standard deviation of 0.02
 
 By default, the score computed at each CV iteration is the ``score``
 method of the estimator. It is possible to change this by using the
@@ -220,7 +219,7 @@ following keys -
 ``['test_<scorer1_name>', 'test_<scorer2_name>', 'test_<scorer...>', 'fit_time', 'score_time']``
 
 ``return_train_score`` is set to ``False`` by default to save computation time.
-To evaluate the scores on the training set as well you need to be set to
+To evaluate the scores on the training set as well you need to set it to
 ``True``.
 
 You may also retain the estimator fitted on each training set by setting
@@ -279,7 +278,7 @@ can be used (otherwise, an exception is
     over cross-validation folds, whereas :func:`cross_val_predict` simply
     returns the labels (or probabilities) from several distinct models
     undistinguished. Thus, :func:`cross_val_predict` is not an appropriate
-    measure of generalisation error.
+    measure of generalization error.
 
 
 The function :func:`cross_val_predict` is appropriate for:
@@ -319,16 +318,17 @@ samples.
 
 The following cross-validators can be used in such cases.
 
-**NOTE**
+.. note::
 
-While i.i.d. data is a common assumption in machine learning theory, it rarely
-holds in practice. If one knows that the samples have been generated using a
-time-dependent process, it is safer to
-use a :ref:`time-series aware cross-validation scheme <timeseries_cv>`.
-Similarly, if we know that the generative process has a group structure
-(samples collected from different subjects, experiments, measurement
-devices), it is safer to use :ref:`group-wise cross-validation <group_cv>`.
+  While i.i.d. data is a common assumption in machine learning theory, it rarely
+  holds in practice. If one knows that the samples have been generated using a
+  time-dependent process, it is safer to
+  use a :ref:`time-series aware cross-validation scheme <timeseries_cv>`.
+  Similarly, if we know that the generative process has a group structure
+  (samples collected from different subjects, experiments, measurement
+  devices), it is safer to use :ref:`group-wise cross-validation <group_cv>`.
 
+.. _k_fold:
 
 K-fold
 ^^^^^^
@@ -353,7 +353,7 @@ Example of 2-fold cross-validation on a
 Here is a visualization of the cross-validation behavior. Note that
 :class:`KFold` is not affected by classes or groups.
 
-.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_004.png
+.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_006.png
    :target: ../auto_examples/model_selection/plot_cv_indices.html
    :align: center
    :scale: 75%
@@ -366,6 +366,7 @@ Thus, one can create the training/test s
   >>> y = np.array([0, 1, 0, 1])
   >>> X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
 
+.. _repeated_k_fold:
 
 Repeated K-Fold
 ^^^^^^^^^^^^^^^
@@ -393,6 +394,7 @@ Example of 2-fold K-Fold repeated 2 time
 Similarly, :class:`RepeatedStratifiedKFold` repeats Stratified K-Fold n times
 with different randomization in each repetition.
 
+.. _leave_one_out:
 
 Leave One Out (LOO)
 ^^^^^^^^^^^^^^^^^^^
@@ -445,12 +447,13 @@ fold cross validation should be preferre
  * L. Breiman, P. Spector `Submodel selection and evaluation in regression: The X-random case
    <http://digitalassets.lib.berkeley.edu/sdtr/ucb/text/197.pdf>`_, International Statistical Review 1992;
  * R. Kohavi, `A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model Selection
-   <http://web.cs.iastate.edu/~jtian/cs573/Papers/Kohavi-IJCAI-95.pdf>`_, Intl. Jnt. Conf. AI
+   <https://www.ijcai.org/Proceedings/95-2/Papers/016.pdf>`_, Intl. Jnt. Conf. AI
  * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation
    <https://people.csail.mit.edu/romer/papers/CrossVal_SDM08.pdf>`_, SIAM 2008;
  * G. James, D. Witten, T. Hastie, R Tibshirani, `An Introduction to
    Statistical Learning <https://www-bcf.usc.edu/~gareth/ISL/>`_, Springer 2013.
 
+.. _leave_p_out:
 
 Leave P Out (LPO)
 ^^^^^^^^^^^^^^^^^
@@ -482,8 +485,6 @@ Example of Leave-2-Out on a dataset with
 Random permutations cross-validation a.k.a. Shuffle & Split
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-:class:`ShuffleSplit`
-
 The :class:`ShuffleSplit` iterator will generate a user defined number of
 independent train / test dataset splits. Samples are first shuffled and
 then split into a pair of train and test sets.
@@ -508,7 +509,7 @@ Here is a usage example::
 Here is a visualization of the cross-validation behavior. Note that
 :class:`ShuffleSplit` is not affected by classes or groups.
 
-.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_006.png
+.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_008.png
    :target: ../auto_examples/model_selection/plot_cv_indices.html
    :align: center
    :scale: 75%
@@ -517,6 +518,8 @@ Here is a visualization of the cross-val
 validation that allows a finer control on the number of iterations and
 the proportion of samples on each side of the train / test split.
 
+.. _stratification:
+
 Cross-validation iterators with stratification based on class labels.
 ---------------------------------------------------------------------
 
@@ -527,6 +530,8 @@ stratified sampling as implemented in :c
 :class:`StratifiedShuffleSplit` to ensure that relative class frequencies is
 approximately preserved in each train and validation fold.
 
+.. _stratified_k_fold:
+
 Stratified k-fold
 ^^^^^^^^^^^^^^^^^
 
@@ -561,7 +566,7 @@ We can see that :class:`StratifiedKFold`
 
 Here is a visualization of the cross-validation behavior.
 
-.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_007.png
+.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_009.png
    :target: ../auto_examples/model_selection/plot_cv_indices.html
    :align: center
    :scale: 75%
@@ -569,6 +574,7 @@ Here is a visualization of the cross-val
 :class:`RepeatedStratifiedKFold` can be used to repeat Stratified K-Fold n times
 with different randomization in each repetition.
 
+.. _stratified_shuffle_split:
 
 Stratified Shuffle Split
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -579,7 +585,7 @@ percentage for each target class as in t
 
 Here is a visualization of the cross-validation behavior.
 
-.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_009.png
+.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_012.png
    :target: ../auto_examples/model_selection/plot_cv_indices.html
    :align: center
    :scale: 75%
@@ -606,6 +612,7 @@ The following cross-validation splitters
 The grouping identifier for the samples is specified via the ``groups``
 parameter.
 
+.. _group_k_fold:
 
 Group k-fold
 ^^^^^^^^^^^^
@@ -638,11 +645,65 @@ size due to the imbalance in the data.
 
 Here is a visualization of the cross-validation behavior.
 
+.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_007.png
+   :target: ../auto_examples/model_selection/plot_cv_indices.html
+   :align: center
+   :scale: 75%
+
+.. _stratified_group_k_fold:
+
+StratifiedGroupKFold
+^^^^^^^^^^^^^^^^^^^^
+
+:class:`StratifiedGroupKFold` is a cross-validation scheme that combines both
+:class:`StratifiedKFold` and :class:`GroupKFold`. The idea is to try to
+preserve the distribution of classes in each split while keeping each group
+within a single split. That might be useful when you have an unbalanced
+dataset so that using just :class:`GroupKFold` might produce skewed splits.
+
+Example::
+
+  >>> from sklearn.model_selection import StratifiedGroupKFold
+  >>> X = list(range(18))
+  >>> y = [1] * 6 + [0] * 12
+  >>> groups = [1, 2, 3, 3, 4, 4, 1, 1, 2, 2, 3, 4, 5, 5, 5, 6, 6, 6]
+  >>> sgkf = StratifiedGroupKFold(n_splits=3)
+  >>> for train, test in sgkf.split(X, y, groups=groups):
+  ...     print("%s %s" % (train, test))
+  [ 0  2  3  4  5  6  7 10 11 15 16 17] [ 1  8  9 12 13 14]
+  [ 0  1  4  5  6  7  8  9 11 12 13 14] [ 2  3 10 15 16 17]
+  [ 1  2  3  8  9 10 12 13 14 15 16 17] [ 0  4  5  6  7 11]
+
+Implementation notes:
+
+- With the current implementation full shuffle is not possible in most
+  scenarios. When shuffle=True, the following happens:
+
+  1. All groups are shuffled.
+  2. Groups are sorted by standard deviation of classes using stable sort.
+  3. Sorted groups are iterated over and assigned to folds.
+
+  That means that only groups with the same standard deviation of class
+  distribution will be shuffled, which might be useful when each group has only
+  a single class.
+- The algorithm greedily assigns each group to one of n_splits test sets,
+  choosing the test set that minimises the variance in class distribution
+  across test sets. Group assignment proceeds from groups with highest to
+  lowest variance in class frequency, i.e. large groups peaked on one or few
+  classes are assigned first.
+- This split is suboptimal in a sense that it might produce imbalanced splits
+  even if perfect stratification is possible. If you have relatively close
+  distribution of classes in each group, using :class:`GroupKFold` is better.
+
+Here is a visualization of cross-validation behavior for uneven groups:
+
 .. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_005.png
    :target: ../auto_examples/model_selection/plot_cv_indices.html
    :align: center
    :scale: 75%
 
+.. _leave_one_group_out:
+
 Leave One Group Out
 ^^^^^^^^^^^^^^^^^^^
 
@@ -674,6 +735,8 @@ Another common application is to use tim
 groups could be the year of collection of the samples and thus allow
 for cross-validation against time-based splits.
 
+.. _leave_p_groups_out:
+
 Leave P Groups Out
 ^^^^^^^^^^^^^^^^^^
 
@@ -694,6 +757,8 @@ Example of Leave-2-Group Out::
   [2 3] [0 1 4 5]
   [0 1] [2 3 4 5]
 
+.. _group_shuffle_split:
+
 Group Shuffle Split
 ^^^^^^^^^^^^^^^^^^^
 
@@ -720,7 +785,7 @@ Here is a usage example::
 
 Here is a visualization of the cross-validation behavior.
 
-.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_008.png
+.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_011.png
    :target: ../auto_examples/model_selection/plot_cv_indices.html
    :align: center
    :scale: 75%
@@ -728,10 +793,11 @@ Here is a visualization of the cross-val
 This class is useful when the behavior of :class:`LeavePGroupsOut` is
 desired, but the number of groups is large enough that generating all
 possible partitions with :math:`P` groups withheld would be prohibitively
-expensive.  In such a scenario, :class:`GroupShuffleSplit` provides
+expensive. In such a scenario, :class:`GroupShuffleSplit` provides
 a random sample (with replacement) of the train / test splits
 generated by :class:`LeavePGroupsOut`.
 
+.. _predefined_split:
 
 Predefined Fold-Splits / Validation-Sets
 ----------------------------------------
@@ -744,22 +810,52 @@ e.g. when searching for hyperparameters.
 For example, when using a validation set, set the ``test_fold`` to 0 for all
 samples that are part of the validation set, and to -1 for all other samples.
 
+Using cross-validation iterators to split train and test
+--------------------------------------------------------
+
+The above group cross-validation functions may also be useful for splitting a
+dataset into training and testing subsets. Note that the convenience
+function :func:`train_test_split` is a wrapper around :func:`ShuffleSplit`
+and thus only allows for stratified splitting (using the class labels)
+and cannot account for groups.
+
+To perform the train and test split, use the indices for the train and test
+subsets yielded by the generator output by the `split()` method of the
+cross-validation splitter. For example::
+
+  >>> import numpy as np
+  >>> from sklearn.model_selection import GroupShuffleSplit
+
+  >>> X = np.array([0.1, 0.2, 2.2, 2.4, 2.3, 4.55, 5.8, 0.001])
+  >>> y = np.array(["a", "b", "b", "b", "c", "c", "c", "a"])
+  >>> groups = np.array([1, 1, 2, 2, 3, 3, 4, 4])
+  >>> train_indx, test_indx = next(
+  ...     GroupShuffleSplit(random_state=7).split(X, y, groups)
+  ... )
+  >>> X_train, X_test, y_train, y_test = \
+  ...     X[train_indx], X[test_indx], y[train_indx], y[test_indx]
+  >>> X_train.shape, X_test.shape
+  ((6,), (2,))
+  >>> np.unique(groups[train_indx]), np.unique(groups[test_indx])
+  (array([1, 2, 4]), array([3]))
+
 .. _timeseries_cv:
 
 Cross validation of time series data
 ------------------------------------
 
-Time series data is characterised by the correlation between observations
+Time series data is characterized by the correlation between observations
 that are near in time (*autocorrelation*). However, classical
 cross-validation techniques such as :class:`KFold` and
 :class:`ShuffleSplit` assume the samples are independent and
 identically distributed, and would result in unreasonable correlation
 between training and testing instances (yielding poor estimates of
-generalisation error) on time series data. Therefore, it is very important
+generalization error) on time series data. Therefore, it is very important
 to evaluate our model for time series data on the "future" observations
 least like those that are used to train the model. To achieve this, one
 solution is provided by :class:`TimeSeriesSplit`.
 
+.. _time_series_split:
 
 Time Series Split
 ^^^^^^^^^^^^^^^^^
@@ -782,7 +878,7 @@ Example of 3-split time series cross-val
   >>> y = np.array([1, 2, 3, 4, 5, 6])
   >>> tscv = TimeSeriesSplit(n_splits=3)
   >>> print(tscv)
-  TimeSeriesSplit(max_train_size=None, n_splits=3)
+  TimeSeriesSplit(gap=0, max_train_size=None, n_splits=3, test_size=None)
   >>> for train, test in tscv.split(X):
   ...     print("%s %s" % (train, test))
   [0 1 2] [3]
@@ -791,7 +887,7 @@ Example of 3-split time series cross-val
 
 Here is a visualization of the cross-validation behavior.
 
-.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_010.png
+.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_013.png
    :target: ../auto_examples/model_selection/plot_cv_indices.html
    :align: center
    :scale: 75%
@@ -822,9 +918,72 @@ to shuffle the data indices before split
   of parameters validated by a single call to its ``fit`` method.
 * To get identical results for each split, set ``random_state`` to an integer.
 
+For more details on how to control the randomness of cv splitters and avoid
+common pitfalls, see :ref:`randomness`.
+
 Cross validation and model selection
 ====================================
 
 Cross validation iterators can also be used to directly perform model
 selection using Grid Search for the optimal hyperparameters of the
 model. This is the topic of the next section: :ref:`grid_search`.
+
+.. _permutation_test_score:
+
+Permutation test score
+======================
+
+:func:`~sklearn.model_selection.permutation_test_score` offers another way
+to evaluate the performance of classifiers. It provides a permutation-based
+p-value, which represents how likely an observed performance of the
+classifier would be obtained by chance. The null hypothesis in this test is
+that the classifier fails to leverage any statistical dependency between the
+features and the labels to make correct predictions on left out data.
+:func:`~sklearn.model_selection.permutation_test_score` generates a null
+distribution by calculating `n_permutations` different permutations of the
+data. In each permutation the labels are randomly shuffled, thereby removing
+any dependency between the features and the labels. The p-value output
+is the fraction of permutations for which the average cross-validation score
+obtained by the model is better than the cross-validation score obtained by
+the model using the original data. For reliable results ``n_permutations``
+should typically be larger than 100 and ``cv`` between 3-10 folds.
+
+A low p-value provides evidence that the dataset contains real dependency
+between features and labels and the classifier was able to utilize this
+to obtain good results. A high p-value could be due to a lack of dependency
+between features and labels (there is no difference in feature values between
+the classes) or because the classifier was not able to use the dependency in
+the data. In the latter case, using a more appropriate classifier that
+is able to utilize the structure in the data, would result in a lower
+p-value.
+
+Cross-validation provides information about how well a classifier generalizes,
+specifically the range of expected errors of the classifier. However, a
+classifier trained on a high dimensional dataset with no structure may still
+perform better than expected on cross-validation, just by chance.
+This can typically happen with small datasets with less than a few hundred
+samples.
+:func:`~sklearn.model_selection.permutation_test_score` provides information
+on whether the classifier has found a real class structure and can help in
+evaluating the performance of the classifier.
+
+It is important to note that this test has been shown to produce low
+p-values even if there is only weak structure in the data because in the
+corresponding permutated datasets there is absolutely no structure. This
+test is therefore only able to show when the model reliably outperforms
+random guessing.
+
+Finally, :func:`~sklearn.model_selection.permutation_test_score` is computed
+using brute force and internally fits ``(n_permutations + 1) * n_cv`` models.
+It is therefore only tractable with small datasets for which fitting an
+individual model is very fast.
+
+.. topic:: Examples
+
+    * :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py`
+
+.. topic:: References:
+
+ * Ojala and Garriga. `Permutation Tests for Studying Classifier Performance
+   <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_.
+   J. Mach. Learn. Res. 2010.
diff -pruN 0.23.2-5/doc/modules/decomposition.rst 1.1.1-1/doc/modules/decomposition.rst
--- 0.23.2-5/doc/modules/decomposition.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/decomposition.rst	2022-05-19 12:16:26.448782400 +0000
@@ -166,32 +166,13 @@ Note: the implementation of ``inverse_tr
 
 .. topic:: References:
 
-    * `"Finding structure with randomness: Stochastic algorithms for
-      constructing approximate matrix decompositions"
-      <https://arxiv.org/abs/0909.4061>`_
+    * Algorithm 4.3 in
+      :arxiv:`"Finding structure with randomness: Stochastic algorithms for
+      constructing approximate matrix decompositions" <0909.4061>`
       Halko, et al., 2009
 
-
-.. _kernel_PCA:
-
-Kernel PCA
-----------
-
-:class:`KernelPCA` is an extension of PCA which achieves non-linear
-dimensionality reduction through the use of kernels (see :ref:`metrics`). It
-has many applications including denoising, compression and structured
-prediction (kernel dependency estimation). :class:`KernelPCA` supports both
-``transform`` and ``inverse_transform``.
-
-.. figure:: ../auto_examples/decomposition/images/sphx_glr_plot_kernel_pca_001.png
-    :target: ../auto_examples/decomposition/plot_kernel_pca.html
-    :align: center
-    :scale: 75%
-
-.. topic:: Examples:
-
-    * :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py`
-
+    * :arxiv:`"An implementation of a randomized algorithm for principal component
+      analysis" <1412.3510>` A. Szlam et al. 2014
 
 .. _SparsePCA:
 
@@ -246,12 +227,14 @@ problem solved is a PCA problem (diction
 
 .. math::
    (U^*, V^*) = \underset{U, V}{\operatorname{arg\,min\,}} & \frac{1}{2}
-                ||X-UV||_2^2+\alpha||V||_1 \\
-                \text{subject to } & ||U_k||_2 = 1 \text{ for all }
+                ||X-UV||_{\text{Fro}}^2+\alpha||V||_{1,1} \\
+                \text{subject to } & ||U_k||_2 <= 1 \text{ for all }
                 0 \leq k < n_{components}
 
-
-The sparsity-inducing :math:`\ell_1` norm also prevents learning
+:math:`||.||_{\text{Fro}}` stands for the Frobenius norm and :math:`||.||_{1,1}`
+stands for the entry-wise matrix norm which is the sum of the absolute values
+of all the entries in the matrix.
+The sparsity-inducing :math:`||.||_{1,1}` matrix norm also prevents learning
 components from noise when few training samples are available. The degree
 of penalization (and thus sparsity) can be adjusted through the
 hyperparameter ``alpha``. Small values lead to a gently regularized
@@ -278,6 +261,111 @@ factorization, while larger values shrin
      R. Jenatton, G. Obozinski, F. Bach, 2009
 
 
+.. _kernel_PCA:
+
+Kernel Principal Component Analysis (kPCA)
+==========================================
+
+Exact Kernel PCA
+----------------
+
+:class:`KernelPCA` is an extension of PCA which achieves non-linear
+dimensionality reduction through the use of kernels (see :ref:`metrics`) [Scholkopf1997]_. It
+has many applications including denoising, compression and structured
+prediction (kernel dependency estimation). :class:`KernelPCA` supports both
+``transform`` and ``inverse_transform``.
+
+.. figure:: ../auto_examples/decomposition/images/sphx_glr_plot_kernel_pca_002.png
+    :target: ../auto_examples/decomposition/plot_kernel_pca.html
+    :align: center
+    :scale: 75%
+
+.. note::
+    :meth:`KernelPCA.inverse_transform` relies on a kernel ridge to learn the
+    function mapping samples from the PCA basis into the original feature
+    space [Bakir2004]_. Thus, the reconstruction obtained with
+    :meth:`KernelPCA.inverse_transform` is an approximation. See the example
+    linked below for more details.
+
+.. topic:: Examples:
+
+    * :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py`
+
+.. topic:: References:
+
+    .. [Scholkopf1997] Schölkopf, Bernhard, Alexander Smola, and Klaus-Robert Müller.
+       `"Kernel principal component analysis."
+       <https://people.eecs.berkeley.edu/~wainwrig/stat241b/scholkopf_kernel.pdf>`_
+       International conference on artificial neural networks.
+       Springer, Berlin, Heidelberg, 1997.
+
+    .. [Bakir2004] Bakır, Gökhan H., Jason Weston, and Bernhard Schölkopf.
+       `"Learning to find pre-images."
+       <https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.68.5164&rep=rep1&type=pdf>`_
+       Advances in neural information processing systems 16 (2004): 449-456.
+
+.. _kPCA_Solvers:
+
+Choice of solver for Kernel PCA
+-------------------------------
+
+While in :class:`PCA` the number of components is bounded by the number of
+features, in :class:`KernelPCA` the number of components is bounded by the
+number of samples. Many real-world datasets have large number of samples! In
+these cases finding *all* the components with a full kPCA is a waste of
+computation time, as data is mostly described by the first few components
+(e.g. ``n_components<=100``). In other words, the centered Gram matrix that
+is eigendecomposed in the Kernel PCA fitting process has an effective rank that
+is much smaller than its size. This is a situation where approximate
+eigensolvers can provide speedup with very low precision loss.
+
+The optional parameter ``eigen_solver='randomized'`` can be used to
+*significantly* reduce the computation time when the number of requested
+``n_components`` is small compared with the number of samples. It relies on
+randomized decomposition methods to find an approximate solution in a shorter
+time.
+
+The time complexity of the randomized :class:`KernelPCA` is
+:math:`O(n_{\mathrm{samples}}^2 \cdot n_{\mathrm{components}})`
+instead of :math:`O(n_{\mathrm{samples}}^3)` for the exact method
+implemented with ``eigen_solver='dense'``.
+
+The memory footprint of randomized :class:`KernelPCA` is also proportional to
+:math:`2 \cdot n_{\mathrm{samples}} \cdot n_{\mathrm{components}}` instead of
+:math:`n_{\mathrm{samples}}^2` for the exact method.
+
+Note: this technique is the same as in :ref:`RandomizedPCA`.
+
+In addition to the above two solvers, ``eigen_solver='arpack'`` can be used as
+an alternate way to get an approximate decomposition. In practice, this method
+only provides reasonable execution times when the number of components to find
+is extremely small. It is enabled by default when the desired number of
+components is less than 10 (strict) and the number of samples is more than 200
+(strict). See :class:`KernelPCA` for details.
+
+.. topic:: References:
+
+    * *dense* solver:
+      `scipy.linalg.eigh documentation
+      <https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.eigh.html>`_
+
+    * *randomized* solver:
+
+        * Algorithm 4.3 in
+          :arxiv:`"Finding structure with randomness: Stochastic
+          algorithms for constructing approximate matrix decompositions" <0909.4061>`
+          Halko, et al. (2009)
+
+        * :arxiv:`"An implementation of a randomized algorithm
+          for principal component analysis" <1412.3510>`
+          A. Szlam et al. (2014)
+
+    * *arpack* solver:
+      `scipy.sparse.linalg.eigsh documentation
+      <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.eigsh.html>`_
+      R. B. Lehoucq, D. C. Sorensen, and C. Yang, (1998)
+
+
 .. _LSA:
 
 Truncated singular value decomposition and latent semantic analysis
@@ -310,7 +398,7 @@ produces a low-rank approximation :math:
 .. math::
     X \approx X_k = U_k \Sigma_k V_k^\top
 
-After this operation, :math:`U_k \Sigma_k^\top`
+After this operation, :math:`U_k \Sigma_k`
 is the transformed training set with :math:`k` features
 (called ``n_components`` in the API).
 
@@ -432,8 +520,8 @@ dictionary fixed, and then updating the
 
 .. math::
    (U^*, V^*) = \underset{U, V}{\operatorname{arg\,min\,}} & \frac{1}{2}
-                ||X-UV||_2^2+\alpha||U||_1 \\
-                \text{subject to } & ||V_k||_2 = 1 \text{ for all }
+                ||X-UV||_{\text{Fro}}^2+\alpha||U||_{1,1} \\
+                \text{subject to } & ||V_k||_2 <= 1 \text{ for all }
                 0 \leq k < n_{\mathrm{atoms}}
 
 
@@ -441,13 +529,15 @@ dictionary fixed, and then updating the
    :target: ../auto_examples/decomposition/plot_faces_decomposition.html
    :scale: 60%
 
-.. |dict_img2| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_006.png
+.. |dict_img2| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_007.png
    :target: ../auto_examples/decomposition/plot_faces_decomposition.html
    :scale: 60%
 
 .. centered:: |pca_img2| |dict_img2|
 
-
+:math:`||.||_{\text{Fro}}` stands for the Frobenius norm and :math:`||.||_{1,1}`
+stands for the entry-wise matrix norm which is the sum of the absolute values
+of all the entries in the matrix.
 After using such a procedure to fit the dictionary, the transform is simply a
 sparse coding step that shares the same implementation with all dictionary
 learning objects (see :ref:`SparseCoder`).
@@ -458,19 +548,19 @@ different positivity constraints applied
 indicates positive values, and white represents zeros.
 
 
-.. |dict_img_pos1| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_011.png
+.. |dict_img_pos1| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_010.png
     :target: ../auto_examples/decomposition/plot_image_denoising.html
     :scale: 60%
 
-.. |dict_img_pos2| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_012.png
+.. |dict_img_pos2| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_011.png
     :target: ../auto_examples/decomposition/plot_image_denoising.html
     :scale: 60%
 
-.. |dict_img_pos3| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_013.png
+.. |dict_img_pos3| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_012.png
     :target: ../auto_examples/decomposition/plot_image_denoising.html
     :scale: 60%
 
-.. |dict_img_pos4| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_014.png
+.. |dict_img_pos4| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_013.png
     :target: ../auto_examples/decomposition/plot_image_denoising.html
     :scale: 60%
 
@@ -599,7 +689,7 @@ about these components (e.g. whether the
     :target: ../auto_examples/decomposition/plot_faces_decomposition.html
     :scale: 60%
 
-.. |fa_img3| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_009.png
+.. |fa_img3| image:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_008.png
     :target: ../auto_examples/decomposition/plot_faces_decomposition.html
     :scale: 60%
 
@@ -609,7 +699,7 @@ The main advantage for Factor Analysis o
 it can model the variance in every direction of the input space independently
 (heteroscedastic noise):
 
-.. figure:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_008.png
+.. figure:: ../auto_examples/decomposition/images/sphx_glr_plot_faces_decomposition_009.png
     :target: ../auto_examples/decomposition/plot_faces_decomposition.html
     :align: center
     :scale: 75%
@@ -622,11 +712,18 @@ of heteroscedastic noise:
     :align: center
     :scale: 75%
 
+Factor Analysis is often followed by a rotation of the factors (with the
+parameter `rotation`), usually to improve interpretability. For example,
+Varimax rotation maximizes the sum of the variances of the squared loadings,
+i.e., it tends to produce sparser factors, which are influenced by only a few
+features each (the "simple structure"). See e.g., the first example below.
 
 .. topic:: Examples:
 
+    * :ref:`sphx_glr_auto_examples_decomposition_plot_varimax_fa.py`
     * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py`
 
+
 .. _ICA:
 
 Independent component analysis (ICA)
@@ -736,25 +833,23 @@ In :class:`NMF`, L1 and L2 priors can be
 to regularize the model. The L2 prior uses the Frobenius norm, while the L1
 prior uses an elementwise L1 norm. As in :class:`ElasticNet`, we control the
 combination of L1 and L2 with the :attr:`l1_ratio` (:math:`\rho`) parameter,
-and the intensity of the regularization with the :attr:`alpha`
-(:math:`\alpha`) parameter. Then the priors terms are:
+and the intensity of the regularization with the :attr:`alpha_W` and :attr:`alpha_H`
+(:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are scaled by the number
+of samples (:math:`n\_samples`) for `H` and the number of features (:math:`n\_features`)
+for `W` to keep their impact balanced with respect to one another and to the data fit
+term as independent as possible of the size of the training set. Then the priors terms
+are:
 
 .. math::
-    \alpha \rho ||W||_1 + \alpha \rho ||H||_1
-    + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2
-    + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2
+    (\alpha_W \rho ||W||_1 + \frac{\alpha_W(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2) * n\_features
+    + (\alpha_H \rho ||H||_1 + \frac{\alpha_H(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2) * n\_samples
 
 and the regularized objective function is:
 
 .. math::
     d_{\mathrm{Fro}}(X, WH)
-    + \alpha \rho ||W||_1 + \alpha \rho ||H||_1
-    + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2
-    + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2
-
-:class:`NMF` regularizes both W and H. The public function
-:func:`non_negative_factorization` allows a finer control through the
-:attr:`regularization` attribute, and may regularize only W, only H, or both.
+    + (\alpha_W \rho ||W||_1 + \frac{\alpha_W(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2) * n\_features
+    + (\alpha_H \rho ||H||_1 + \frac{\alpha_H(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2) * n\_samples
 
 NMF with a beta-divergence
 --------------------------
@@ -826,10 +921,33 @@ stored components::
     * :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py`
     * :ref:`sphx_glr_auto_examples_decomposition_plot_beta_divergence.py`
 
+.. _MiniBatchNMF:
+
+Mini-batch Non Negative Matrix Factorization
+--------------------------------------------
+
+:class:`MiniBatchNMF` [7]_ implements a faster, but less accurate version of the
+non negative matrix factorization (i.e. :class:`~sklearn.decomposition.NMF`),
+better suited for large datasets.
+
+By default, :class:`MiniBatchNMF` divides the data into mini-batches and
+optimizes the NMF model in an online manner by cycling over the mini-batches
+for the specified number of iterations. The ``batch_size`` parameter controls
+the size of the batches.
+
+In order to speed up the mini-batch algorithm it is also possible to scale
+past batches, giving them less importance than newer batches. This is done
+introducing a so-called forgetting factor controlled by the ``forget_factor``
+parameter.
+
+The estimator also implements ``partial_fit``, which updates ``H`` by iterating
+only once over a mini-batch. This can be used for online learning when the data
+is not readily available from the start, or when the data does not fit into memory.
+
 .. topic:: References:
 
     .. [1] `"Learning the parts of objects by non-negative matrix factorization"
-      <http://www.columbia.edu/~jwp2128/Teaching/E4903/papers/nmf_nature.pdf>`_
+      <http://www.cs.columbia.edu/~blei/fogm/2020F/readings/LeeSeung1999.pdf>`_
       D. Lee, S. Seung, 1999
 
     .. [2] `"Non-negative Matrix Factorization with Sparseness Constraints"
@@ -843,13 +961,16 @@ stored components::
 
     .. [5] `"Fast local algorithms for large scale nonnegative matrix and tensor
       factorizations."
-      <http://www.bsp.brain.riken.jp/publications/2009/Cichocki-Phan-IEICE_col.pdf>`_
+      <https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.214.6398&rep=rep1&type=pdf>`_
       A. Cichocki, A. Phan, 2009
 
-    .. [6] `"Algorithms for nonnegative matrix factorization with the beta-divergence"
-      <https://arxiv.org/pdf/1010.1763.pdf>`_
-      C. Fevotte, J. Idier, 2011
-
+    .. [6] :arxiv:`"Algorithms for nonnegative matrix factorization with
+           the beta-divergence" <1010.1763>`
+           C. Fevotte, J. Idier, 2011
+
+    .. [7] :arxiv:`"Online algorithms for nonnegative matrix factorization with the
+       Itakura-Saito divergence" <1106.4198>`
+       A. Lefevre, F. Bach, C. Fevotte, 2011
 
 .. _LatentDirichletAllocation:
 
@@ -865,34 +986,34 @@ The graphical model of LDA is a three-le
 .. image:: ../images/lda_model_graph.png
    :align: center
 
-Note on notations presented in the graphical model above, which can be found in 
+Note on notations presented in the graphical model above, which can be found in
 Hoffman et al. (2013):
 
   * The corpus is a collection of :math:`D` documents.
   * A document is a sequence of :math:`N` words.
-  * There are :math:`K` topics in the corpus. 
-  * The boxes represent repeated sampling. 
+  * There are :math:`K` topics in the corpus.
+  * The boxes represent repeated sampling.
 
-In the graphical model, each node is a random variable and has a role in the 
-generative process. A shaded node indicates an observed variable and an unshaded 
-node indicates a hidden (latent) variable. In this case, words in the corpus are 
-the only data that we observe. The latent variables determine the random mixture 
-of topics in the corpus and the distribution of words in the documents. 
-The goal of LDA is to use the observed words to infer the hidden topic 
-structure. 
+In the graphical model, each node is a random variable and has a role in the
+generative process. A shaded node indicates an observed variable and an unshaded
+node indicates a hidden (latent) variable. In this case, words in the corpus are
+the only data that we observe. The latent variables determine the random mixture
+of topics in the corpus and the distribution of words in the documents.
+The goal of LDA is to use the observed words to infer the hidden topic
+structure.
 
-When modeling text corpora, the model assumes the following generative process 
-for a corpus with :math:`D` documents and :math:`K` topics, with :math:`K` 
+When modeling text corpora, the model assumes the following generative process
+for a corpus with :math:`D` documents and :math:`K` topics, with :math:`K`
 corresponding to :attr:`n_components` in the API:
 
-  1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim 
-     \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words, 
-     i.e. the probability of a word appearing in topic :math:`k`. 
-     :math:`\eta` corresponds to :attr:`topic_word_prior`. 
-
-  2. For each document :math:`d \in D`, draw the topic proportions 
-     :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha` 
-     corresponds to :attr:`doc_topic_prior`. 
+  1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim
+     \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words,
+     i.e. the probability of a word appearing in topic :math:`k`.
+     :math:`\eta` corresponds to :attr:`topic_word_prior`.
+
+  2. For each document :math:`d \in D`, draw the topic proportions
+     :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha`
+     corresponds to :attr:`doc_topic_prior`.
 
   3. For each word :math:`i` in document :math:`d`:
 
@@ -909,8 +1030,8 @@ For parameter estimation, the posterior
 
 Since the posterior is intractable, variational Bayesian method
 uses a simpler distribution :math:`q(z,\theta,\beta | \lambda, \phi, \gamma)`
-to approximate it, and those variational parameters :math:`\lambda`, 
-:math:`\phi`, :math:`\gamma` are optimized to maximize the Evidence 
+to approximate it, and those variational parameters :math:`\lambda`,
+:math:`\phi`, :math:`\gamma` are optimized to maximize the Evidence
 Lower Bound (ELBO):
 
 .. math::
@@ -921,10 +1042,10 @@ Maximizing ELBO is equivalent to minimiz
 between :math:`q(z,\theta,\beta)` and the true posterior
 :math:`p(z, \theta, \beta |w, \alpha, \eta)`.
 
-:class:`LatentDirichletAllocation` implements the online variational Bayes 
+:class:`LatentDirichletAllocation` implements the online variational Bayes
 algorithm and supports both online and batch update methods.
-While the batch method updates variational variables after each full pass through 
-the data, the online method updates variational variables from mini-batch data 
+While the batch method updates variational variables after each full pass through
+the data, the online method updates variational variables from mini-batch data
 points.
 
 .. note::
@@ -959,6 +1080,9 @@ when data can be fetched sequentially.
       <http://www.columbia.edu/~jwp2128/Papers/HoffmanBleiWangPaisley2013.pdf>`_
       M. Hoffman, D. Blei, C. Wang, J. Paisley, 2013
 
+    * `"The varimax criterion for analytic rotation in factor analysis"
+      <https://link.springer.com/article/10.1007%2FBF02289233>`_
+      H. F. Kaiser, 1958
 
 See also :ref:`nca_dim_reduction` for dimensionality reduction with
 Neighborhood Components Analysis.
diff -pruN 0.23.2-5/doc/modules/density.rst 1.1.1-1/doc/modules/density.rst
--- 0.23.2-5/doc/modules/density.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/density.rst	2022-05-19 12:16:26.448782400 +0000
@@ -8,9 +8,9 @@ Density Estimation
 Density estimation walks the line between unsupervised learning, feature
 engineering, and data modeling.  Some of the most popular and useful
 density estimation techniques are mixture models such as
-Gaussian Mixtures (:class:`sklearn.mixture.GaussianMixture`), and
+Gaussian Mixtures (:class:`~sklearn.mixture.GaussianMixture`), and
 neighbor-based approaches such as the kernel density estimate
-(:class:`sklearn.neighbors.KernelDensity`).
+(:class:`~sklearn.neighbors.KernelDensity`).
 Gaussian Mixtures are discussed more fully in the context of
 :ref:`clustering <clustering>`, because the technique is also useful as
 an unsupervised clustering scheme.
@@ -58,7 +58,7 @@ distribution of points.
 Kernel Density Estimation
 =========================
 Kernel density estimation in scikit-learn is implemented in the
-:class:`sklearn.neighbors.KernelDensity` estimator, which uses the
+:class:`~sklearn.neighbors.KernelDensity` estimator, which uses the
 Ball Tree or KD Tree for efficient queries (see :ref:`neighbors` for
 a discussion of these).  Though the above example
 uses a 1D data set for simplicity, kernel density estimation can be
@@ -100,7 +100,7 @@ between bias and variance in the result.
 smooth (i.e. high-bias) density distribution.  A small bandwidth leads
 to an unsmooth (i.e. high-variance) density distribution.
 
-:class:`sklearn.neighbors.KernelDensity` implements several common kernel
+:class:`~sklearn.neighbors.KernelDensity` implements several common kernel
 forms, which are shown in the following figure:
 
 .. |kde_kernels| image:: ../auto_examples/neighbors/images/sphx_glr_plot_kde_1d_002.png
@@ -136,9 +136,9 @@ The form of these kernels is as follows:
   :math:`K(x; h) \propto \cos(\frac{\pi x}{2h})` if :math:`x < h`
 
 The kernel density estimator can be used with any of the valid distance
-metrics (see :class:`sklearn.neighbors.DistanceMetric` for a list of available metrics), though
-the results are properly normalized only for the Euclidean metric.  One
-particularly useful metric is the
+metrics (see :class:`~sklearn.metrics.DistanceMetric` for a list of
+available metrics), though the results are properly normalized only
+for the Euclidean metric.  One particularly useful metric is the
 `Haversine distance <https://en.wikipedia.org/wiki/Haversine_formula>`_
 which measures the angular distance between points on a sphere.  Here
 is an example of using a kernel density estimate for a visualization
diff -pruN 0.23.2-5/doc/modules/ensemble.rst 1.1.1-1/doc/modules/ensemble.rst
--- 0.23.2-5/doc/modules/ensemble.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/ensemble.rst	2022-05-19 12:16:26.448782400 +0000
@@ -110,9 +110,9 @@ construction.  The prediction of the ens
 prediction of the individual classifiers.
 
 As other classifiers, forest classifiers have to be fitted with two
-arrays: a sparse or dense array X of size ``[n_samples, n_features]`` holding the
-training samples, and an array Y of size ``[n_samples]`` holding the
-target values (class labels) for the training samples::
+arrays: a sparse or dense array X of shape ``(n_samples, n_features)``
+holding the training samples, and an array Y of shape ``(n_samples,)``
+holding the target values (class labels) for the training samples::
 
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> X = [[0, 0], [1, 1]]
@@ -120,9 +120,9 @@ target values (class labels) for the tra
     >>> clf = RandomForestClassifier(n_estimators=10)
     >>> clf = clf.fit(X, Y)
 
-Like :ref:`decision trees <tree>`, forests of trees also extend
-to :ref:`multi-output problems <tree_multioutput>`  (if Y is an array of size
-``[n_samples, n_outputs]``).
+Like :ref:`decision trees <tree>`, forests of trees also extend to
+:ref:`multi-output problems <tree_multioutput>`  (if Y is an array
+of shape ``(n_samples, n_outputs)``).
 
 Random Forests
 --------------
@@ -207,19 +207,22 @@ results will stop getting significantly
 trees. The latter is the size of the random subsets of features to consider
 when splitting a node. The lower the greater the reduction of variance, but
 also the greater the increase in bias. Empirical good default values are
-``max_features=None`` (always considering all features instead of a random
-subset) for regression problems, and ``max_features="sqrt"`` (using a random
-subset of size ``sqrt(n_features)``) for classification tasks (where
-``n_features`` is the number of features in the data). Good results are often
-achieved when setting ``max_depth=None`` in combination with
-``min_samples_split=2`` (i.e., when fully developing the trees). Bear in mind
-though that these values are usually not optimal, and might result in models
-that consume a lot of RAM. The best parameter values should always be
-cross-validated. In addition, note that in random forests, bootstrap samples
-are used by default (``bootstrap=True``) while the default strategy for
-extra-trees is to use the whole dataset (``bootstrap=False``). When using
-bootstrap sampling the generalization accuracy can be estimated on the left out
-or out-of-bag samples. This can be enabled by setting ``oob_score=True``.
+``max_features=1.0`` or equivalently ``max_features=None`` (always considering
+all features instead of a random subset) for regression problems, and
+``max_features="sqrt"`` (using a random subset of size ``sqrt(n_features)``)
+for classification tasks (where ``n_features`` is the number of features in
+the data). The default value of ``max_features=1.0`` is equivalent to bagged
+trees and more randomness can be achieved by setting smaller values (e.g. 0.3
+is a typical default in the literature). Good results are often achieved when
+setting ``max_depth=None`` in combination with ``min_samples_split=2`` (i.e.,
+when fully developing the trees). Bear in mind though that these values are
+usually not optimal, and might result in models that consume a lot of RAM.
+The best parameter values should always be cross-validated. In addition, note
+that in random forests, bootstrap samples are used by default
+(``bootstrap=True``) while the default strategy for extra-trees is to use the
+whole dataset (``bootstrap=False``). When using bootstrap sampling the
+generalization error can be estimated on the left out or out-of-bag samples.
+This can be enabled by setting ``oob_score=True``.
 
 .. note::
 
@@ -455,10 +458,9 @@ Gradient Tree Boosting
 
 `Gradient Tree Boosting <https://en.wikipedia.org/wiki/Gradient_boosting>`_
 or Gradient Boosted Decision Trees (GBDT) is a generalization
-of boosting to arbitrary
-differentiable loss functions. GBDT is an accurate and effective
-off-the-shelf procedure that can be used for both regression and
-classification problems in a
+of boosting to arbitrary differentiable loss functions, see the seminal work of
+[Friedman2001]_. GBDT is an accurate and effective off-the-shelf procedure that can be
+used for both regression and classification problems in a
 variety of areas including Web search ranking and ecology.
 
 The module :mod:`sklearn.ensemble` provides methods
@@ -467,7 +469,7 @@ trees.
 
 .. note::
 
-  Scikit-learn 0.21 introduces two new experimental implementations of
+  Scikit-learn 0.21 introduces two new implementations of
   gradient boosting trees, namely :class:`HistGradientBoostingClassifier`
   and :class:`HistGradientBoostingRegressor`, inspired by
   `LightGBM <https://github.com/Microsoft/LightGBM>`__ (See [LightGBM]_).
@@ -537,7 +539,8 @@ Regression
 :class:`GradientBoostingRegressor` supports a number of
 :ref:`different loss functions <gradient_boosting_loss>`
 for regression which can be specified via the argument
-``loss``; the default loss function for regression is least squares (``'ls'``).
+``loss``; the default loss function for regression is squared error
+(``'squared_error'``).
 
 ::
 
@@ -549,14 +552,16 @@ for regression which can be specified vi
     >>> X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0)
     >>> X_train, X_test = X[:200], X[200:]
     >>> y_train, y_test = y[:200], y[200:]
-    >>> est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1,
-    ...     max_depth=1, random_state=0, loss='ls').fit(X_train, y_train)
+    >>> est = GradientBoostingRegressor(
+    ...     n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0,
+    ...     loss='squared_error'
+    ... ).fit(X_train, y_train)
     >>> mean_squared_error(y_test, est.predict(X_test))
     5.00...
 
 The figure below shows the results of applying :class:`GradientBoostingRegressor`
-with least squares loss and 500 base learners to the Boston house price dataset
-(:func:`sklearn.datasets.load_boston`).
+with least squares loss and 500 base learners to the diabetes dataset
+(:func:`sklearn.datasets.load_diabetes`).
 The plot on the left shows the train and test error at each iteration.
 The train error at each iteration is stored in the
 :attr:`~GradientBoostingRegressor.train_score_` attribute
@@ -564,8 +569,6 @@ of the gradient boosting model. The test
 via the :meth:`~GradientBoostingRegressor.staged_predict` method which returns a
 generator that yields the predictions at each stage. Plots like these can be used
 to determine the optimal number of trees (i.e. ``n_estimators``) by early stopping.
-The plot on the right shows the impurity-based feature importances which can be
-obtained via the ``feature_importances_`` property.
 
 .. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_gradient_boosting_regression_001.png
    :target: ../auto_examples/ensemble/plot_gradient_boosting_regression.html
@@ -619,7 +622,7 @@ We found that ``max_leaf_nodes=k`` gives
 but is significantly faster to train at the expense of a slightly higher
 training error.
 The parameter ``max_leaf_nodes`` corresponds to the variable ``J`` in the
-chapter on gradient boosting in [F2001]_ and is related to the parameter
+chapter on gradient boosting in [Friedman2001]_ and is related to the parameter
 ``interaction.depth`` in R's gbm package where ``max_leaf_nodes == interaction.depth + 1`` .
 
 Mathematical formulation
@@ -631,12 +634,12 @@ case.
 Regression
 ^^^^^^^^^^
 
-GBRT regressors are additive models whose prediction :math:`y_i` for a
+GBRT regressors are additive models whose prediction :math:`\hat{y}_i` for a
 given input :math:`x_i` is of the following form:
 
   .. math::
 
-    \hat{y_i} = F_M(x_i) = \sum_{m=1}^{M} h_m(x_i)
+    \hat{y}_i = F_M(x_i) = \sum_{m=1}^{M} h_m(x_i)
 
 where the :math:`h_m` are estimators called *weak learners* in the context
 of boosting. Gradient Tree Boosting uses :ref:`decision tree regressors
@@ -720,9 +723,9 @@ homogeneous to a prediction: it cannot b
 continuous values.
 
 The mapping from the value :math:`F_M(x_i)` to a class or a probability is
-loss-dependent. For the deviance (or log-loss), the probability that
+loss-dependent. For the log-loss, the probability that
 :math:`x_i` belongs to the positive class is modeled as :math:`p(y_i = 1 |
-x_i) = \sigma(F_M(x_i))` where :math:`\sigma` is the sigmoid function.
+x_i) = \sigma(F_M(x_i))` where :math:`\sigma` is the sigmoid or expit function.
 
 For multiclass classification, K trees (for K classes) are built at each of
 the :math:`M` iterations. The probability that :math:`x_i` belongs to class
@@ -743,15 +746,15 @@ the parameter ``loss``:
 
   * Regression
 
-    * Least squares (``'ls'``): The natural choice for regression due
-      to its superior computational properties. The initial model is
+    * Squared error (``'squared_error'``): The natural choice for regression
+      due to its superior computational properties. The initial model is
       given by the mean of the target values.
     * Least absolute deviation (``'lad'``): A robust loss function for
       regression. The initial model is given by the median of the
       target values.
     * Huber (``'huber'``): Another robust loss function that combines
       least squares and least absolute deviation; use ``alpha`` to
-      control the sensitivity with regards to outliers (see [F2001]_ for
+      control the sensitivity with regards to outliers (see [Friedman2001]_ for
       more details).
     * Quantile (``'quantile'``): A loss function for quantile regression.
       Use ``0 < alpha < 1`` to specify the quantile. This loss function
@@ -760,12 +763,12 @@ the parameter ``loss``:
 
   * Classification
 
-    * Binomial deviance (``'deviance'``): The negative binomial
-      log-likelihood loss function for binary classification (provides
-      probability estimates).  The initial model is given by the
+    * Binary log-loss (``'log-loss'``): The binomial
+      negative log-likelihood loss function for binary classification. It provides
+      probability estimates.  The initial model is given by the
       log odds-ratio.
-    * Multinomial deviance (``'deviance'``): The negative multinomial
-      log-likelihood loss function for multi-class classification with
+    * Multi-class log-loss (``'log-loss'``): The multinomial
+      negative log-likelihood loss function for multi-class classification with
       ``n_classes`` mutually exclusive classes. It provides
       probability estimates.  The initial model is given by the
       prior probability of each class. At each iteration ``n_classes``
@@ -773,7 +776,7 @@ the parameter ``loss``:
       inefficient for data sets with a large number of classes.
     * Exponential loss (``'exponential'``): The same loss function
       as :class:`AdaBoostClassifier`. Less robust to mislabeled
-      examples than ``'deviance'``; can only be used for binary
+      examples than ``'log-loss'``; can only be used for binary
       classification.
 
 .. _gradient_boosting_shrinkage:
@@ -781,7 +784,7 @@ the parameter ``loss``:
 Shrinkage via learning rate
 ---------------------------
 
-[F2001]_ proposed a simple regularization strategy that scales
+[Friedman2001]_ proposed a simple regularization strategy that scales
 the contribution of each weak learner by a constant factor :math:`\nu`:
 
 .. math::
@@ -805,7 +808,7 @@ stopping. For a more detailed discussion
 Subsampling
 -----------
 
-[F1999]_ proposed stochastic gradient boosting, which combines gradient
+[Friedman2002]_ proposed stochastic gradient boosting, which combines gradient
 boosting with bootstrap averaging (bagging). At each iteration
 the base classifier is trained on a fraction ``subsample`` of
 the available training data. The subsample is drawn without replacement.
@@ -892,12 +895,25 @@ based on permutation of the features.
 
  * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py`
 
+.. topic:: References
+
+  .. [Friedman2001] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient
+      boosting machine <10.1214/aos/1013203451>`.
+      Annals of Statistics, 29, 1189-1232.
+
+  .. [Friedman2002] Friedman, J.H. (2002). `Stochastic gradient boosting.
+     <https://statweb.stanford.edu/~jhf/ftp/stobst.pdf>`_.
+     Computational Statistics & Data Analysis, 38, 367-378.
+
+  .. [R2007] G. Ridgeway (2006). `Generalized Boosted Models: A guide to the gbm
+     package <https://cran.r-project.org/web/packages/gbm/vignettes/gbm.pdf>`_
+
 .. _histogram_based_gradient_boosting:
 
 Histogram-Based Gradient Boosting
 =================================
 
-Scikit-learn 0.21 introduced two new experimental implementations of
+Scikit-learn 0.21 introduced two new implementations of
 gradient boosting trees, namely :class:`HistGradientBoostingClassifier`
 and :class:`HistGradientBoostingRegressor`, inspired by
 `LightGBM <https://github.com/Microsoft/LightGBM>`__ (See [LightGBM]_).
@@ -919,15 +935,6 @@ estimators is slightly different, and so
 :class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`
 are not yet supported, for instance some loss functions.
 
-These estimators are still **experimental**: their predictions
-and their API might change without any deprecation cycle. To use them, you
-need to explicitly import ``enable_hist_gradient_boosting``::
-
-  >>> # explicitly require this experimental feature
-  >>> from sklearn.experimental import enable_hist_gradient_boosting  # noqa
-  >>> # now you can import normally from ensemble
-  >>> from sklearn.ensemble import HistGradientBoostingClassifier
-
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
@@ -940,7 +947,6 @@ Most of the parameters are unchanged fro
 One exception is the ``max_iter`` parameter that replaces ``n_estimators``, and
 controls the number of iterations of the boosting process::
 
-  >>> from sklearn.experimental import enable_hist_gradient_boosting
   >>> from sklearn.ensemble import HistGradientBoostingClassifier
   >>> from sklearn.datasets import make_hastie_10_2
 
@@ -952,13 +958,14 @@ controls the number of iterations of the
   >>> clf.score(X_test, y_test)
   0.8965
 
-Available losses for regression are 'least_squares',
-'least_absolute_deviation', which is less sensitive to outliers, and
+Available losses for regression are 'squared_error',
+'absolute_error', which is less sensitive to outliers, and
 'poisson', which is well suited to model counts and frequencies. For
-classification, 'binary_crossentropy' is used for binary classification and
-'categorical_crossentropy' is used for multiclass classification. By default
-the loss is 'auto' and will select the appropriate loss depending on
-:term:`y` passed to :term:`fit`.
+classification, 'log_loss' is the only option. For binary classification it uses the
+binary log loss, also kown as binomial deviance or binary cross-entropy. For
+`n_classes >= 3`, it uses the multi-class log loss function, with multinomial deviance
+and categorical cross-entropy as alternative names. The appropriate loss version is
+selected based on :term:`y` passed to :term:`fit`.
 
 The size of the trees can be controlled through the ``max_leaf_nodes``,
 ``max_depth``, and ``min_samples_leaf`` parameters.
@@ -991,7 +998,6 @@ with missing values should go to the lef
 potential gain. When predicting, samples with missing values are assigned to
 the left or right child consequently::
 
-  >>> from sklearn.experimental import enable_hist_gradient_boosting  # noqa
   >>> from sklearn.ensemble import HistGradientBoostingClassifier
   >>> import numpy as np
 
@@ -1053,6 +1059,68 @@ multiplying the gradients (and the hessi
 the binning stage (specifically the quantiles computation) does not take the
 weights into account.
 
+.. _categorical_support_gbdt:
+
+Categorical Features Support
+----------------------------
+
+:class:`HistGradientBoostingClassifier` and
+:class:`HistGradientBoostingRegressor` have native support for categorical
+features: they can consider splits on non-ordered, categorical data.
+
+For datasets with categorical features, using the native categorical support
+is often better than relying on one-hot encoding
+(:class:`~sklearn.preprocessing.OneHotEncoder`), because one-hot encoding
+requires more tree depth to achieve equivalent splits. It is also usually
+better to rely on the native categorical support rather than to treat
+categorical features as continuous (ordinal), which happens for ordinal-encoded
+categorical data, since categories are nominal quantities where order does not
+matter.
+
+To enable categorical support, a boolean mask can be passed to the
+`categorical_features` parameter, indicating which feature is categorical. In
+the following, the first feature will be treated as categorical and the
+second feature as numerical::
+
+  >>> gbdt = HistGradientBoostingClassifier(categorical_features=[True, False])
+
+Equivalently, one can pass a list of integers indicating the indices of the
+categorical features::
+
+  >>> gbdt = HistGradientBoostingClassifier(categorical_features=[0])
+
+The cardinality of each categorical feature should be less than the `max_bins`
+parameter, and each categorical feature is expected to be encoded in
+`[0, max_bins - 1]`. To that end, it might be useful to pre-process the data
+with an :class:`~sklearn.preprocessing.OrdinalEncoder` as done in
+:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`.
+
+If there are missing values during training, the missing values will be
+treated as a proper category. If there are no missing values during training,
+then at prediction time, missing values are mapped to the child node that has
+the most samples (just like for continuous features). When predicting,
+categories that were not seen during fit time will be treated as missing
+values.
+
+**Split finding with categorical features**: The canonical way of considering
+categorical splits in a tree is to consider
+all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of
+categories. This can quickly become prohibitive when :math:`K` is large.
+Fortunately, since gradient boosting trees are always regression trees (even
+for classification problems), there exist a faster strategy that can yield
+equivalent splits. First, the categories of a feature are sorted according to
+the variance of the target, for each category `k`. Once the categories are
+sorted, one can consider *continuous partitions*, i.e. treat the categories
+as if they were ordered continuous values (see Fisher [Fisher1958]_ for a
+formal proof). As a result, only :math:`K - 1` splits need to be considered
+instead of :math:`2^{K - 1} - 1`. The initial sorting is a
+:math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of
+:math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`.
+
+.. topic:: Examples:
+
+  * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`
+
 .. _monotonic_cst_gbdt:
 
 Monotonic Constraints
@@ -1083,7 +1151,6 @@ You can specify a monotonic constraint o
 constraint, while -1 and 1 indicate a negative and positive constraint,
 respectively::
 
-  >>> from sklearn.experimental import enable_hist_gradient_boosting  # noqa
   >>> from sklearn.ensemble import HistGradientBoostingRegressor
 
   ... # positive, negative, and no constraint on the 3 features
@@ -1094,6 +1161,10 @@ that the feature is supposed to have a p
 probability to belong to the positive class. Monotonic constraints are not
 supported for multiclass context.
 
+.. note::
+    Since categories are unordered quantities, it is not possible to enforce
+    monotonic constraints on categorical features.
+
 .. topic:: Examples:
 
   * :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py`
@@ -1151,16 +1222,17 @@ Finally, many parts of the implementatio
 
 .. topic:: References
 
-  .. [F1999] Friedmann, Jerome H., 2007, `"Stochastic Gradient Boosting"
-     <https://statweb.stanford.edu/~jhf/ftp/stobst.pdf>`_
-  .. [R2007] G. Ridgeway, "Generalized Boosted Models: A guide to the gbm
-     package", 2007
-  .. [XGBoost] Tianqi Chen, Carlos Guestrin, `"XGBoost: A Scalable Tree
-     Boosting System" <https://arxiv.org/abs/1603.02754>`_
+  .. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree
+     Boosting System" <1603.02754>`
+
   .. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient
      BoostingDecision Tree" <https://papers.nips.cc/paper/
      6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree>`_
 
+  .. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity"
+     <http://csiss.ncgia.ucsb.edu/SPACE/workshops/2004/SAC/files/fisher.pdf>`_
+     Journal of the American Statistical Association, 53, 789-798.
+
 .. _voting_classifier:
 
 Voting Classifier
@@ -1348,18 +1420,18 @@ Usage
 
 The following example shows how to fit the VotingRegressor::
 
-   >>> from sklearn.datasets import load_boston
+   >>> from sklearn.datasets import load_diabetes
    >>> from sklearn.ensemble import GradientBoostingRegressor
    >>> from sklearn.ensemble import RandomForestRegressor
    >>> from sklearn.linear_model import LinearRegression
    >>> from sklearn.ensemble import VotingRegressor
 
    >>> # Loading some example data
-   >>> X, y = load_boston(return_X_y=True)
+   >>> X, y = load_diabetes(return_X_y=True)
 
    >>> # Training classifiers
-   >>> reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
-   >>> reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
+   >>> reg1 = GradientBoostingRegressor(random_state=1)
+   >>> reg2 = RandomForestRegressor(random_state=1)
    >>> reg3 = LinearRegression()
    >>> ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)])
    >>> ereg = ereg.fit(X, y)
@@ -1392,10 +1464,11 @@ are stacked together in parallel on the
 list of names and estimators::
 
   >>> from sklearn.linear_model import RidgeCV, LassoCV
-  >>> from sklearn.svm import SVR
+  >>> from sklearn.neighbors import KNeighborsRegressor
   >>> estimators = [('ridge', RidgeCV()),
   ...               ('lasso', LassoCV(random_state=42)),
-  ...               ('svr', SVR(C=1, gamma=1e-6))]
+  ...               ('knr', KNeighborsRegressor(n_neighbors=20,
+  ...                                           metric='euclidean'))]
 
 The `final_estimator` will use the predictions of the `estimators` as input. It
 needs to be a classifier or a regressor when using :class:`StackingClassifier`
@@ -1403,15 +1476,18 @@ or :class:`StackingRegressor`, respectiv
 
   >>> from sklearn.ensemble import GradientBoostingRegressor
   >>> from sklearn.ensemble import StackingRegressor
+  >>> final_estimator = GradientBoostingRegressor(
+  ...     n_estimators=25, subsample=0.5, min_samples_leaf=25, max_features=1,
+  ...     random_state=42)
   >>> reg = StackingRegressor(
   ...     estimators=estimators,
-  ...     final_estimator=GradientBoostingRegressor(random_state=42))
+  ...     final_estimator=final_estimator)
 
 To train the `estimators` and `final_estimator`, the `fit` method needs
 to be called on the training data::
 
-  >>> from sklearn.datasets import load_boston
-  >>> X, y = load_boston(return_X_y=True)
+  >>> from sklearn.datasets import load_diabetes
+  >>> X, y = load_diabetes(return_X_y=True)
   >>> from sklearn.model_selection import train_test_split
   >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
   ...                                                     random_state=42)
@@ -1437,21 +1513,21 @@ any other regressor or classifier, expos
    >>> y_pred = reg.predict(X_test)
    >>> from sklearn.metrics import r2_score
    >>> print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))
-   R2 score: 0.81
+   R2 score: 0.53
 
 Note that it is also possible to get the output of the stacked
 `estimators` using the `transform` method::
 
   >>> reg.transform(X_test[:5])
-  array([[28.78..., 28.43...  , 22.62...],
-         [35.96..., 32.58..., 23.68...],
-         [14.97..., 14.05..., 16.45...],
-         [25.19..., 25.54..., 22.92...],
-         [18.93..., 19.26..., 17.03... ]])
-
-In practise, a stacking predictor predict as good as the best predictor of the
-base layer and even sometimes outputperform it by combining the different
-strength of the these predictors. However, training a stacking predictor is
+  array([[142..., 138..., 146...],
+         [179..., 182..., 151...],
+         [139..., 132..., 158...],
+         [286..., 292..., 225...],
+         [126..., 124..., 164...]])
+
+In practice, a stacking predictor predicts as good as the best predictor of the
+base layer and even sometimes outperforms it by combining the different
+strengths of the these predictors. However, training a stacking predictor is
 computationally expensive.
 
 .. note::
@@ -1464,22 +1540,27 @@ computationally expensive.
    Multiple stacking layers can be achieved by assigning `final_estimator` to
    a :class:`StackingClassifier` or :class:`StackingRegressor`::
 
+    >>> final_layer_rfr = RandomForestRegressor(
+    ...     n_estimators=10, max_features=1, max_leaf_nodes=5,random_state=42)
+    >>> final_layer_gbr = GradientBoostingRegressor(
+    ...     n_estimators=10, max_features=1, max_leaf_nodes=5,random_state=42)
     >>> final_layer = StackingRegressor(
-    ...     estimators=[('rf', RandomForestRegressor(random_state=42)),
-    ...                 ('gbrt', GradientBoostingRegressor(random_state=42))],
+    ...     estimators=[('rf', final_layer_rfr),
+    ...                 ('gbrt', final_layer_gbr)],
     ...     final_estimator=RidgeCV()
     ...     )
     >>> multi_layer_regressor = StackingRegressor(
     ...     estimators=[('ridge', RidgeCV()),
     ...                 ('lasso', LassoCV(random_state=42)),
-    ...                 ('svr', SVR(C=1, gamma=1e-6, kernel='rbf'))],
+    ...                 ('knr', KNeighborsRegressor(n_neighbors=20,
+    ...                                             metric='euclidean'))],
     ...     final_estimator=final_layer
     ... )
     >>> multi_layer_regressor.fit(X_train, y_train)
     StackingRegressor(...)
     >>> print('R2 score: {:.2f}'
     ...       .format(multi_layer_regressor.score(X_test, y_test)))
-    R2 score: 0.83
+    R2 score: 0.53
 
 .. topic:: References
 
diff -pruN 0.23.2-5/doc/modules/feature_extraction.rst 1.1.1-1/doc/modules/feature_extraction.rst
--- 0.23.2-5/doc/modules/feature_extraction.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/feature_extraction.rst	2022-05-19 12:16:26.452782900 +0000
@@ -1,4 +1,4 @@
-﻿.. _feature_extraction:
+.. _feature_extraction:
 
 ==================
 Feature extraction
@@ -53,8 +53,28 @@ is a traditional numerical feature::
          [ 0.,  1.,  0., 12.],
          [ 0.,  0.,  1., 18.]])
 
-  >>> vec.get_feature_names()
-  ['city=Dubai', 'city=London', 'city=San Francisco', 'temperature']
+  >>> vec.get_feature_names_out()
+  array(['city=Dubai', 'city=London', 'city=San Francisco', 'temperature'], ...)
+
+:class:`DictVectorizer` accepts multiple string values for one
+feature, like, e.g., multiple categories for a movie.
+
+Assume a database classifies each movie using some categories (not mandatories)
+and its year of release.
+
+    >>> movie_entry = [{'category': ['thriller', 'drama'], 'year': 2003},
+    ...                {'category': ['animation', 'family'], 'year': 2011},
+    ...                {'year': 1974}]
+    >>> vec.fit_transform(movie_entry).toarray()
+    array([[0.000e+00, 1.000e+00, 0.000e+00, 1.000e+00, 2.003e+03],
+           [1.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 2.011e+03],
+           [0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.974e+03]])
+    >>> vec.get_feature_names_out()
+    array(['category=animation', 'category=drama', 'category=family',
+           'category=thriller', 'year'], ...)
+    >>> vec.transform({'category': ['thriller'],
+    ...                'unseen_feature': '3'}).toarray()
+    array([[0., 0., 0., 1., 0.]])
 
 :class:`DictVectorizer` is also a useful representation transformation
 for training sequence classifiers in Natural Language Processing models
@@ -81,7 +101,7 @@ such a window of features extracted arou
 
 This description can be vectorized into a sparse two-dimensional matrix
 suitable for feeding into a classifier (maybe after being piped into a
-:class:`text.TfidfTransformer` for normalization)::
+:class:`~text.TfidfTransformer` for normalization)::
 
   >>> vec = DictVectorizer()
   >>> pos_vectorized = vec.fit_transform(pos_window)
@@ -90,8 +110,9 @@ suitable for feeding into a classifier (
       with 6 stored elements in Compressed Sparse ... format>
   >>> pos_vectorized.toarray()
   array([[1., 1., 1., 1., 1., 1.]])
-  >>> vec.get_feature_names()
-  ['pos+1=PP', 'pos-1=NN', 'pos-2=DT', 'word+1=on', 'word-1=cat', 'word-2=the']
+  >>> vec.get_feature_names_out()
+  array(['pos+1=PP', 'pos-1=NN', 'pos-2=DT', 'word+1=on', 'word-1=cat',
+         'word-2=the'], ...)
 
 As you can imagine, if one extracts such a context around each individual
 word of a corpus of documents the resulting matrix will be very wide
@@ -129,8 +150,8 @@ and the expected mean of any output feat
 is enabled by default with ``alternate_sign=True`` and is particularly useful
 for small hash table sizes (``n_features < 10000``). For large hash table
 sizes, it can be disabled, to allow the output to be passed to estimators like
-:class:`sklearn.naive_bayes.MultinomialNB` or
-:class:`sklearn.feature_selection.chi2`
+:class:`~sklearn.naive_bayes.MultinomialNB` or
+:class:`~sklearn.feature_selection.chi2`
 feature selectors that expect non-negative inputs.
 
 :class:`FeatureHasher` accepts either mappings
@@ -148,7 +169,7 @@ The output from :class:`FeatureHasher` i
 in the CSR format.
 
 Feature hashing can be employed in document classification,
-but unlike :class:`text.CountVectorizer`,
+but unlike :class:`~text.CountVectorizer`,
 :class:`FeatureHasher` does not do word
 splitting or any other preprocessing except Unicode-to-UTF-8 encoding;
 see :ref:`hashing_vectorizer`, below, for a combined tokenizer/hasher.
@@ -319,10 +340,9 @@ Each term found by the analyzer during t
 integer index corresponding to a column in the resulting matrix. This
 interpretation of the columns can be retrieved as follows::
 
-  >>> vectorizer.get_feature_names() == (
-  ...     ['and', 'document', 'first', 'is', 'one',
-  ...      'second', 'the', 'third', 'this'])
-  True
+  >>> vectorizer.get_feature_names_out()
+  array(['and', 'document', 'first', 'is', 'one', 'second', 'the',
+         'third', 'this'], ...)
 
   >>> X.toarray()
   array([[0, 1, 1, 1, 0, 0, 1, 0, 1],
@@ -385,8 +405,8 @@ however, similar words are useful for pr
 writing style or personality.
 
 There are several known issues in our provided 'english' stop word list. It
-does not aim to be a general, 'one-size-fits-all' solution as some tasks 
-may require a more custom solution. See [NQY18]_ for more details. 
+does not aim to be a general, 'one-size-fits-all' solution as some tasks
+may require a more custom solution. See [NQY18]_ for more details.
 
 Please take care in choosing a stop word list.
 Popular stop word lists may include words that are highly informative to
@@ -721,9 +741,8 @@ decide better::
 
   >>> ngram_vectorizer = CountVectorizer(analyzer='char_wb', ngram_range=(2, 2))
   >>> counts = ngram_vectorizer.fit_transform(['words', 'wprds'])
-  >>> ngram_vectorizer.get_feature_names() == (
-  ...     [' w', 'ds', 'or', 'pr', 'rd', 's ', 'wo', 'wp'])
-  True
+  >>> ngram_vectorizer.get_feature_names_out()
+  array([' w', 'ds', 'or', 'pr', 'rd', 's ', 'wo', 'wp'], ...)
   >>> counts.toarray().astype(int)
   array([[1, 1, 1, 0, 1, 1, 1, 0],
          [1, 1, 0, 1, 1, 1, 0, 1]])
@@ -737,17 +756,15 @@ span across words::
   >>> ngram_vectorizer.fit_transform(['jumpy fox'])
   <1x4 sparse matrix of type '<... 'numpy.int64'>'
      with 4 stored elements in Compressed Sparse ... format>
-  >>> ngram_vectorizer.get_feature_names() == (
-  ...     [' fox ', ' jump', 'jumpy', 'umpy '])
-  True
+  >>> ngram_vectorizer.get_feature_names_out()
+  array([' fox ', ' jump', 'jumpy', 'umpy '], ...)
 
   >>> ngram_vectorizer = CountVectorizer(analyzer='char', ngram_range=(5, 5))
   >>> ngram_vectorizer.fit_transform(['jumpy fox'])
   <1x5 sparse matrix of type '<... 'numpy.int64'>'
       with 5 stored elements in Compressed Sparse ... format>
-  >>> ngram_vectorizer.get_feature_names() == (
-  ...     ['jumpy', 'mpy f', 'py fo', 'umpy ', 'y fox'])
-  True
+  >>> ngram_vectorizer.get_feature_names_out()
+  array(['jumpy', 'mpy f', 'py fo', 'umpy ', 'y fox'], ...)
 
 The word boundaries-aware variant ``char_wb`` is especially interesting
 for languages that use white-spaces for word separation as it generates
@@ -800,7 +817,7 @@ datasets**:
 
 It is possible to overcome those limitations by combining the "hashing trick"
 (:ref:`Feature_hashing`) implemented by the
-:class:`sklearn.feature_extraction.FeatureHasher` class and the text
+:class:`~sklearn.feature_extraction.FeatureHasher` class and the text
 preprocessing and tokenization features of the :class:`CountVectorizer`.
 
 This combination is implementing in :class:`HashingVectorizer`,
diff -pruN 0.23.2-5/doc/modules/feature_selection.rst 1.1.1-1/doc/modules/feature_selection.rst
--- 0.23.2-5/doc/modules/feature_selection.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/feature_selection.rst	2022-05-19 12:16:26.452782900 +0000
@@ -119,12 +119,12 @@ Recursive feature elimination
 =============================
 
 Given an external estimator that assigns weights to features (e.g., the
-coefficients of a linear model), recursive feature elimination (:class:`RFE`)
+coefficients of a linear model), the goal of recursive feature elimination (:class:`RFE`)
 is to select features by recursively considering smaller and smaller sets of
-features.  First, the estimator is trained on the initial set of features and
-the importance of each feature is obtained either through a ``coef_`` attribute
-or through a ``feature_importances_`` attribute. Then, the least important
-features are pruned from current set of features.That procedure is recursively
+features. First, the estimator is trained on the initial set of features and
+the importance of each feature is obtained either through any specific attribute
+(such as ``coef_``, ``feature_importances_``) or callable. Then, the least important
+features are pruned from current set of features. That procedure is recursively
 repeated on the pruned set until the desired number of features to select is
 eventually reached.
 
@@ -145,10 +145,11 @@ number of features.
 Feature selection using SelectFromModel
 =======================================
 
-:class:`SelectFromModel` is a meta-transformer that can be used along with any
-estimator that has a ``coef_`` or ``feature_importances_`` attribute after fitting.
-The features are considered unimportant and removed, if the corresponding
-``coef_`` or ``feature_importances_`` values are below the provided
+:class:`SelectFromModel` is a meta-transformer that can be used alongside any
+estimator that assigns importance to each feature through a specific attribute (such as
+``coef_``, ``feature_importances_``) or via an `importance_getter` callable after fitting.
+The features are considered unimportant and removed if the corresponding
+importance of the feature values are below the provided
 ``threshold`` parameter. Apart from specifying the threshold numerically,
 there are built-in heuristics for finding a threshold using a string argument.
 Available heuristics are "mean", "median" and float multiples of these like
@@ -159,9 +160,7 @@ For examples on how it is to be used ref
 
 .. topic:: Examples
 
-    * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py`: Selecting the two
-      most important features from the diabetes dataset without knowing the
-      threshold beforehand.
+    * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py`
 
 .. _l1_feature_selection:
 
@@ -173,10 +172,10 @@ L1-based feature selection
 :ref:`Linear models <linear_model>` penalized with the L1 norm have
 sparse solutions: many of their estimated coefficients are zero. When the goal
 is to reduce the dimensionality of the data to use with another classifier,
-they can be used along with :class:`feature_selection.SelectFromModel`
+they can be used along with :class:`~feature_selection.SelectFromModel`
 to select the non-zero coefficients. In particular, sparse estimators useful
-for this purpose are the :class:`linear_model.Lasso` for regression, and
-of :class:`linear_model.LogisticRegression` and :class:`svm.LinearSVC`
+for this purpose are the :class:`~linear_model.Lasso` for regression, and
+of :class:`~linear_model.LogisticRegression` and :class:`~svm.LinearSVC`
 for classification::
 
   >>> from sklearn.svm import LinearSVC
@@ -234,7 +233,7 @@ Tree-based feature selection
 Tree-based estimators (see the :mod:`sklearn.tree` module and forest
 of trees in the :mod:`sklearn.ensemble` module) can be used to compute
 impurity-based feature importances, which in turn can be used to discard irrelevant
-features (when coupled with the :class:`sklearn.feature_selection.SelectFromModel`
+features (when coupled with the :class:`~feature_selection.SelectFromModel`
 meta-transformer)::
 
   >>> from sklearn.ensemble import ExtraTreesClassifier
@@ -261,12 +260,61 @@ meta-transformer)::
     * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py`: example
       on face recognition data.
 
+.. _sequential_feature_selection:
+
+Sequential Feature Selection
+============================
+
+Sequential Feature Selection [sfs]_ (SFS) is available in the
+:class:`~sklearn.feature_selection.SequentialFeatureSelector` transformer.
+SFS can be either forward or backward:
+
+Forward-SFS is a greedy procedure that iteratively finds the best new feature
+to add to the set of selected features. Concretely, we initially start with
+zero feature and find the one feature that maximizes a cross-validated score
+when an estimator is trained on this single feature. Once that first feature
+is selected, we repeat the procedure by adding a new feature to the set of
+selected features. The procedure stops when the desired number of selected
+features is reached, as determined by the `n_features_to_select` parameter.
+
+Backward-SFS follows the same idea but works in the opposite direction:
+instead of starting with no feature and greedily adding features, we start
+with *all* the features and greedily *remove* features from the set. The
+`direction` parameter controls whether forward or backward SFS is used.
+
+In general, forward and backward selection do not yield equivalent results.
+Also, one may be much faster than the other depending on the requested number
+of selected features: if we have 10 features and ask for 7 selected features,
+forward selection would need to perform 7 iterations while backward selection
+would only need to perform 3.
+
+SFS differs from :class:`~sklearn.feature_selection.RFE` and
+:class:`~sklearn.feature_selection.SelectFromModel` in that it does not
+require the underlying model to expose a `coef_` or `feature_importances_`
+attribute. It may however be slower considering that more models need to be
+evaluated, compared to the other approaches. For example in backward
+selection, the iteration going from `m` features to `m - 1` features using k-fold
+cross-validation requires fitting `m * k` models, while
+:class:`~sklearn.feature_selection.RFE` would require only a single fit, and
+:class:`~sklearn.feature_selection.SelectFromModel` always just does a single
+fit and requires no iterations.
+
+.. topic:: Examples
+
+    * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py`
+
+.. topic:: References:
+
+   .. [sfs] Ferri et al, `Comparative study of techniques for
+      large-scale feature selection
+      <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.24.4369&rep=rep1&type=pdf>`_.
+
 Feature selection as part of a pipeline
 =======================================
 
 Feature selection is usually used as a pre-processing step before doing
 the actual learning. The recommended way to do this in scikit-learn is
-to use a :class:`sklearn.pipeline.Pipeline`::
+to use a :class:`~pipeline.Pipeline`::
 
   clf = Pipeline([
     ('feature_selection', SelectFromModel(LinearSVC(penalty="l1"))),
@@ -274,11 +322,11 @@ to use a :class:`sklearn.pipeline.Pipeli
   ])
   clf.fit(X, y)
 
-In this snippet we make use of a :class:`sklearn.svm.LinearSVC`
-coupled with :class:`sklearn.feature_selection.SelectFromModel`
+In this snippet we make use of a :class:`~svm.LinearSVC`
+coupled with :class:`~feature_selection.SelectFromModel`
 to evaluate feature importances and select the most relevant features.
-Then, a :class:`sklearn.ensemble.RandomForestClassifier` is trained on the
+Then, a :class:`~ensemble.RandomForestClassifier` is trained on the
 transformed output, i.e. using only relevant features. You can perform
 similar operations with the other feature selection methods and also
 classifiers that provide a way to evaluate feature importances of course.
-See the :class:`sklearn.pipeline.Pipeline` examples for more details.
+See the :class:`~pipeline.Pipeline` examples for more details.
diff -pruN 0.23.2-5/doc/modules/gaussian_process.rst 1.1.1-1/doc/modules/gaussian_process.rst
--- 0.23.2-5/doc/modules/gaussian_process.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/gaussian_process.rst	2022-05-19 12:16:26.452782900 +0000
@@ -88,14 +88,14 @@ estimate the noise level of data. An ill
 log-marginal-likelihood (LML) landscape shows that there exist two local
 maxima of LML.
 
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_001.png
+.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_003.png
    :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html
    :align: center
 
 The first corresponds to a model with a high noise level and a
 large length scale, which explains all variations in the data by noise.
 
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_002.png
+.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_004.png
    :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html
    :align: center
 
@@ -106,7 +106,7 @@ hyperparameters, the gradient-based opti
 high-noise solution. It is thus important to repeat the optimization several
 times for different initializations.
 
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_003.png
+.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_005.png
    :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html
    :align: center
 
@@ -142,7 +142,7 @@ Moreover, the noise level
 of the data is learned explicitly by GPR by an additional WhiteKernel component
 in the kernel and by the regularization parameter alpha of KRR.
 
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_compare_gpr_krr_001.png
+.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_compare_gpr_krr_005.png
    :target: ../auto_examples/gaussian_process/plot_compare_gpr_krr.html
    :align: center
 
@@ -156,9 +156,9 @@ required for fitting and predicting: whi
 the grid-search for hyperparameter optimization scales exponentially with the
 number of hyperparameters ("curse of dimensionality"). The gradient-based
 optimization of the parameters in GPR does not suffer from this exponential
-scaling and is thus considerable faster on this example with 3-dimensional
+scaling and is thus considerably faster on this example with 3-dimensional
 hyperparameter space. The time for predicting is similar; however, generating
-the variance of the predictive distribution of GPR takes considerable longer
+the variance of the predictive distribution of GPR takes considerably longer
 than just predicting the mean.
 
 GPR on Mauna Loa CO2 data
@@ -220,7 +220,7 @@ overall noise level is very small, indic
 explained by the model. The figure shows also that the model makes very
 confident predictions until around 2015
 
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_co2_001.png
+.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_co2_003.png
    :target: ../auto_examples/gaussian_process/plot_gpr_co2.html
    :align: center
 
@@ -294,7 +294,7 @@ with different choices of the hyperparam
 predicted probability of GPC with arbitrarily chosen hyperparameters and with
 the hyperparameters corresponding to the maximum log-marginal-likelihood (LML).
 
-While the hyperparameters chosen by optimizing LML have a considerable larger
+While the hyperparameters chosen by optimizing LML have a considerably larger
 LML, they perform slightly worse according to the log-loss on test data. The
 figure shows that this is because they exhibit a steep change of the class
 probabilities at the class boundaries (which is good) but have predicted
@@ -384,13 +384,15 @@ equivalent call to ``__call__``: ``np.di
 
 Kernels are parameterized by a vector :math:`\theta` of hyperparameters. These
 hyperparameters can for instance control length-scales or periodicity of a
-kernel (see below). All kernels support computing analytic gradients 
-of the kernel's auto-covariance with respect to :math:`\theta` via setting
-``eval_gradient=True`` in the ``__call__`` method. This gradient is used by the
-Gaussian process (both regressor and classifier) in computing the gradient
-of the log-marginal-likelihood, which in turn is used to determine the
-value of :math:`\theta`, which maximizes the log-marginal-likelihood,  via
-gradient ascent. For each hyperparameter, the initial value and the
+kernel (see below). All kernels support computing analytic gradients
+of the kernel's auto-covariance with respect to :math:`log(\theta)` via setting
+``eval_gradient=True`` in the ``__call__`` method.
+That is, a ``(len(X), len(X), len(theta))`` array is returned where the entry
+``[i, j, l]`` contains :math:`\frac{\partial k_\theta(x_i, x_j)}{\partial log(\theta_l)}`.
+This gradient is used by the Gaussian process (both regressor and classifier)
+in computing the gradient of the log-marginal-likelihood, which in turn is used
+to determine the value of :math:`\theta`, which maximizes the log-marginal-likelihood,
+via gradient ascent. For each hyperparameter, the initial value and the
 bounds need to be specified when creating an instance of the kernel. The
 current value of :math:`\theta` can be get and set via the property
 ``theta`` of the kernel object. Moreover, the bounds of the hyperparameters can be
diff -pruN 0.23.2-5/doc/modules/grid_search.rst 1.1.1-1/doc/modules/grid_search.rst
--- 0.23.2-5/doc/modules/grid_search.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/grid_search.rst	2022-05-19 12:16:26.452782900 +0000
@@ -30,14 +30,18 @@ A search consists of:
 - a cross-validation scheme; and
 - a :ref:`score function <gridsearch_scoring>`.
 
-Some models allow for specialized, efficient parameter search strategies,
-:ref:`outlined below <alternative_cv>`.
-Two generic approaches to sampling search candidates are provided in
+Two generic approaches to parameter search are provided in
 scikit-learn: for given values, :class:`GridSearchCV` exhaustively considers
 all parameter combinations, while :class:`RandomizedSearchCV` can sample a
 given number of candidates from a parameter space with a specified
-distribution. After describing these tools we detail
-:ref:`best practice <grid_search_tips>` applicable to both approaches.
+distribution. Both these tools have successive halving counterparts
+:class:`HalvingGridSearchCV` and :class:`HalvingRandomSearchCV`, which can be
+much faster at finding a good parameter combination.
+
+After describing these tools we detail :ref:`best practices
+<grid_search_tips>` applicable to these approaches. Some models allow for
+specialized, efficient parameter search strategies, outlined in
+:ref:`alternative_cv`.
 
 Note that it is common that a small subset of those parameters can have a large
 impact on the predictive or computation performance of the model while others
@@ -94,13 +98,17 @@ evaluated and the best combination is re
       amount of flexibility in identifying the "best" estimator. This interface
       can also be used in multiple metrics evaluation.
 
+    - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_stats.py`
+      for an example of how to do a statistical comparison on the outputs of
+      :class:`GridSearchCV`.
+
 .. _randomized_parameter_search:
 
 Randomized Parameter Optimization
 =================================
 While using a grid of parameter settings is currently the most widely used
 method for parameter optimization, other search methods have more
-favourable properties.
+favorable properties.
 :class:`RandomizedSearchCV` implements a randomized search over parameters,
 where each setting is sampled from a distribution over possible parameter values.
 This has two main benefits over an exhaustive search:
@@ -167,6 +175,372 @@ variable that is log-uniformly distribut
       Random search for hyper-parameter optimization,
       The Journal of Machine Learning Research (2012)
 
+.. _successive_halving_user_guide:
+
+Searching for optimal parameters with successive halving
+========================================================
+
+Scikit-learn also provides the :class:`HalvingGridSearchCV` and
+:class:`HalvingRandomSearchCV` estimators that can be used to
+search a parameter space using successive halving [1]_ [2]_. Successive
+halving (SH) is like a tournament among candidate parameter combinations.
+SH is an iterative selection process where all candidates (the
+parameter combinations) are evaluated with a small amount of resources at
+the first iteration. Only some of these candidates are selected for the next
+iteration, which will be allocated more resources. For parameter tuning, the
+resource is typically the number of training samples, but it can also be an
+arbitrary numeric parameter such as `n_estimators` in a random forest.
+
+As illustrated in the figure below, only a subset of candidates
+'survive' until the last iteration. These are the candidates that have
+consistently ranked among the top-scoring candidates across all iterations.
+Each iteration is allocated an increasing amount of resources per candidate,
+here the number of samples.
+
+.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_successive_halving_iterations_001.png
+   :target: ../auto_examples/model_selection/plot_successive_halving_iterations.html
+   :align: center
+
+We here briefly describe the main parameters, but each parameter and their
+interactions are described in more details in the sections below. The
+``factor`` (> 1) parameter controls the rate at which the resources grow, and
+the rate at which the number of candidates decreases. In each iteration, the
+number of resources per candidate is multiplied by ``factor`` and the number
+of candidates is divided by the same factor. Along with ``resource`` and
+``min_resources``, ``factor`` is the most important parameter to control the
+search in our implementation, though a value of 3 usually works well.
+``factor`` effectively controls the number of iterations in
+:class:`HalvingGridSearchCV` and the number of candidates (by default) and
+iterations in :class:`HalvingRandomSearchCV`. ``aggressive_elimination=True``
+can also be used if the number of available resources is small. More control
+is available through tuning the ``min_resources`` parameter.
+
+These estimators are still **experimental**: their predictions
+and their API might change without any deprecation cycle. To use them, you
+need to explicitly import ``enable_halving_search_cv``::
+
+  >>> # explicitly require this experimental feature
+  >>> from sklearn.experimental import enable_halving_search_cv  # noqa
+  >>> # now you can import normally from model_selection
+  >>> from sklearn.model_selection import HalvingGridSearchCV
+  >>> from sklearn.model_selection import HalvingRandomSearchCV
+
+.. topic:: Examples:
+
+    * :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_heatmap.py`
+    * :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_iterations.py`
+
+Choosing ``min_resources`` and the number of candidates
+-------------------------------------------------------
+
+Beside ``factor``, the two main parameters that influence the behaviour of a
+successive halving search are the ``min_resources`` parameter, and the
+number of candidates (or parameter combinations) that are evaluated.
+``min_resources`` is the amount of resources allocated at the first
+iteration for each candidate. The number of candidates is specified directly
+in :class:`HalvingRandomSearchCV`, and is determined from the ``param_grid``
+parameter of :class:`HalvingGridSearchCV`.
+
+Consider a case where the resource is the number of samples, and where we
+have 1000 samples. In theory, with ``min_resources=10`` and ``factor=2``, we
+are able to run **at most** 7 iterations with the following number of
+samples: ``[10, 20, 40, 80, 160, 320, 640]``.
+
+But depending on the number of candidates, we might run less than 7
+iterations: if we start with a **small** number of candidates, the last
+iteration might use less than 640 samples, which means not using all the
+available resources (samples). For example if we start with 5 candidates, we
+only need 2 iterations: 5 candidates for the first iteration, then
+`5 // 2 = 2` candidates at the second iteration, after which we know which
+candidate performs the best (so we don't need a third one). We would only be
+using at most 20 samples which is a waste since we have 1000 samples at our
+disposal. On the other hand, if we start with a **high** number of
+candidates, we might end up with a lot of candidates at the last iteration,
+which may not always be ideal: it means that many candidates will run with
+the full resources, basically reducing the procedure to standard search.
+
+In the case of :class:`HalvingRandomSearchCV`, the number of candidates is set
+by default such that the last iteration uses as much of the available
+resources as possible. For :class:`HalvingGridSearchCV`, the number of
+candidates is determined by the `param_grid` parameter. Changing the value of
+``min_resources`` will impact the number of possible iterations, and as a
+result will also have an effect on the ideal number of candidates.
+
+Another consideration when choosing ``min_resources`` is whether or not it
+is easy to discriminate between good and bad candidates with a small amount
+of resources. For example, if you need a lot of samples to distinguish
+between good and bad parameters, a high ``min_resources`` is recommended. On
+the other hand if the distinction is clear even with a small amount of
+samples, then a small ``min_resources`` may be preferable since it would
+speed up the computation.
+
+Notice in the example above that the last iteration does not use the maximum
+amount of resources available: 1000 samples are available, yet only 640 are
+used, at most. By default, both :class:`HalvingRandomSearchCV` and
+:class:`HalvingGridSearchCV` try to use as many resources as possible in the
+last iteration, with the constraint that this amount of resources must be a
+multiple of both `min_resources` and `factor` (this constraint will be clear
+in the next section). :class:`HalvingRandomSearchCV` achieves this by
+sampling the right amount of candidates, while :class:`HalvingGridSearchCV`
+achieves this by properly setting `min_resources`. Please see
+:ref:`exhausting_the_resources` for details.
+
+.. _amount_of_resource_and_number_of_candidates:
+
+Amount of resource and number of candidates at each iteration
+-------------------------------------------------------------
+
+At any iteration `i`, each candidate is allocated a given amount of resources
+which we denote `n_resources_i`. This quantity is controlled by the
+parameters ``factor`` and ``min_resources`` as follows (`factor` is strictly
+greater than 1)::
+
+    n_resources_i = factor**i * min_resources,
+
+or equivalently::
+
+    n_resources_{i+1} = n_resources_i * factor
+
+where ``min_resources == n_resources_0`` is the amount of resources used at
+the first iteration. ``factor`` also defines the proportions of candidates
+that will be selected for the next iteration::
+
+    n_candidates_i = n_candidates // (factor ** i)
+
+or equivalently::
+
+    n_candidates_0 = n_candidates
+    n_candidates_{i+1} = n_candidates_i // factor
+
+So in the first iteration, we use ``min_resources`` resources
+``n_candidates`` times. In the second iteration, we use ``min_resources *
+factor`` resources ``n_candidates // factor`` times. The third again
+multiplies the resources per candidate and divides the number of candidates.
+This process stops when the maximum amount of resource per candidate is
+reached, or when we have identified the best candidate. The best candidate
+is identified at the iteration that is evaluating `factor` or less candidates
+(see just below for an explanation).
+
+Here is an example with ``min_resources=3`` and ``factor=2``, starting with
+70 candidates:
+
++-----------------------+-----------------------+
+| ``n_resources_i``     | ``n_candidates_i``    |
++=======================+=======================+
+| 3 (=min_resources)    | 70 (=n_candidates)    |
++-----------------------+-----------------------+
+| 3 * 2 = 6             | 70 // 2 = 35          |
++-----------------------+-----------------------+
+| 6 * 2 = 12            | 35 // 2 = 17          |
++-----------------------+-----------------------+
+| 12 * 2 = 24           | 17 // 2 = 8           |
++-----------------------+-----------------------+
+| 24 * 2 = 48           | 8 // 2 = 4            |
++-----------------------+-----------------------+
+| 48 * 2 = 96           | 4 // 2 = 2            |
++-----------------------+-----------------------+
+
+We can note that:
+
+- the process stops at the first iteration which evaluates `factor=2`
+  candidates: the best candidate is the best out of these 2 candidates. It
+  is not necessary to run an additional iteration, since it would only
+  evaluate one candidate (namely the best one, which we have already
+  identified). For this reason, in general, we want the last iteration to
+  run at most ``factor`` candidates. If the last iteration evaluates more
+  than `factor` candidates, then this last iteration reduces to a regular
+  search (as in :class:`RandomizedSearchCV` or :class:`GridSearchCV`).
+- each ``n_resources_i`` is a multiple of both ``factor`` and
+  ``min_resources`` (which is confirmed by its definition above).
+
+The amount of resources that is used at each iteration can be found in the
+`n_resources_` attribute.
+
+Choosing a resource
+-------------------
+
+By default, the resource is defined in terms of number of samples. That is,
+each iteration will use an increasing amount of samples to train on. You can
+however manually specify a parameter to use as the resource with the
+``resource`` parameter. Here is an example where the resource is defined in
+terms of the number of estimators of a random forest::
+
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.ensemble import RandomForestClassifier
+    >>> from sklearn.experimental import enable_halving_search_cv  # noqa
+    >>> from sklearn.model_selection import HalvingGridSearchCV
+    >>> import pandas as pd
+    >>>
+    >>> param_grid = {'max_depth': [3, 5, 10],
+    ...               'min_samples_split': [2, 5, 10]}
+    >>> base_estimator = RandomForestClassifier(random_state=0)
+    >>> X, y = make_classification(n_samples=1000, random_state=0)
+    >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5,
+    ...                          factor=2, resource='n_estimators',
+    ...                          max_resources=30).fit(X, y)
+    >>> sh.best_estimator_
+    RandomForestClassifier(max_depth=5, n_estimators=24, random_state=0)
+
+Note that it is not possible to budget on a parameter that is part of the
+parameter grid.
+
+.. _exhausting_the_resources:
+
+Exhausting the available resources
+----------------------------------
+
+As mentioned above, the number of resources that is used at each iteration
+depends on the `min_resources` parameter.
+If you have a lot of resources available but start with a low number of
+resources, some of them might be wasted (i.e. not used)::
+
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.svm import SVC
+    >>> from sklearn.experimental import enable_halving_search_cv  # noqa
+    >>> from sklearn.model_selection import HalvingGridSearchCV
+    >>> import pandas as pd
+    >>> param_grid= {'kernel': ('linear', 'rbf'),
+    ...              'C': [1, 10, 100]}
+    >>> base_estimator = SVC(gamma='scale')
+    >>> X, y = make_classification(n_samples=1000)
+    >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5,
+    ...                          factor=2, min_resources=20).fit(X, y)
+    >>> sh.n_resources_
+    [20, 40, 80]
+
+The search process will only use 80 resources at most, while our maximum
+amount of available resources is ``n_samples=1000``. Here, we have
+``min_resources = r_0 = 20``.
+
+For :class:`HalvingGridSearchCV`, by default, the `min_resources` parameter
+is set to 'exhaust'. This means that `min_resources` is automatically set
+such that the last iteration can use as many resources as possible, within
+the `max_resources` limit::
+
+    >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5,
+    ...                          factor=2, min_resources='exhaust').fit(X, y)
+    >>> sh.n_resources_
+    [250, 500, 1000]
+
+`min_resources` was here automatically set to 250, which results in the last
+iteration using all the resources. The exact value that is used depends on
+the number of candidate parameter, on `max_resources` and on `factor`.
+
+For :class:`HalvingRandomSearchCV`, exhausting the resources can be done in 2
+ways:
+
+- by setting `min_resources='exhaust'`, just like for
+  :class:`HalvingGridSearchCV`;
+- by setting `n_candidates='exhaust'`.
+
+Both options are mutally exclusive: using `min_resources='exhaust'` requires
+knowing the number of candidates, and symmetrically `n_candidates='exhaust'`
+requires knowing `min_resources`.
+
+In general, exhausting the total number of resources leads to a better final
+candidate parameter, and is slightly more time-intensive.
+
+.. _aggressive_elimination:
+
+Aggressive elimination of candidates
+------------------------------------
+
+Ideally, we want the last iteration to evaluate ``factor`` candidates (see
+:ref:`amount_of_resource_and_number_of_candidates`). We then just have to
+pick the best one. When the number of available resources is small with
+respect to the number of candidates, the last iteration may have to evaluate
+more than ``factor`` candidates::
+
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.svm import SVC
+    >>> from sklearn.experimental import enable_halving_search_cv  # noqa
+    >>> from sklearn.model_selection import HalvingGridSearchCV
+    >>> import pandas as pd
+    >>>
+    >>>
+    >>> param_grid = {'kernel': ('linear', 'rbf'),
+    ...               'C': [1, 10, 100]}
+    >>> base_estimator = SVC(gamma='scale')
+    >>> X, y = make_classification(n_samples=1000)
+    >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5,
+    ...                          factor=2, max_resources=40,
+    ...                          aggressive_elimination=False).fit(X, y)
+    >>> sh.n_resources_
+    [20, 40]
+    >>> sh.n_candidates_
+    [6, 3]
+
+Since we cannot use more than ``max_resources=40`` resources, the process
+has to stop at the second iteration which evaluates more than ``factor=2``
+candidates.
+
+Using the ``aggressive_elimination`` parameter, you can force the search
+process to end up with less than ``factor`` candidates at the last
+iteration. To do this, the process will eliminate as many candidates as
+necessary using ``min_resources`` resources::
+
+    >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5,
+    ...                            factor=2,
+    ...                            max_resources=40,
+    ...                            aggressive_elimination=True,
+    ...                            ).fit(X, y)
+    >>> sh.n_resources_
+    [20, 20,  40]
+    >>> sh.n_candidates_
+    [6, 3, 2]
+
+Notice that we end with 2 candidates at the last iteration since we have
+eliminated enough candidates during the first iterations, using ``n_resources =
+min_resources = 20``.
+
+.. _successive_halving_cv_results:
+
+Analyzing results with the `cv_results_` attribute
+--------------------------------------------------
+
+The ``cv_results_`` attribute contains useful information for analyzing the
+results of a search. It can be converted to a pandas dataframe with ``df =
+pd.DataFrame(est.cv_results_)``. The ``cv_results_`` attribute of
+:class:`HalvingGridSearchCV` and :class:`HalvingRandomSearchCV` is similar
+to that of :class:`GridSearchCV` and :class:`RandomizedSearchCV`, with
+additional information related to the successive halving process.
+
+Here is an example with some of the columns of a (truncated) dataframe:
+
+====  ======  ===============  =================  ========================================================================================
+  ..    iter      n_resources    mean_test_score  params
+====  ======  ===============  =================  ========================================================================================
+   0       0              125           0.983667  {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 5}
+   1       0              125           0.983667  {'criterion': 'gini', 'max_depth': None, 'max_features': 8, 'min_samples_split': 7}
+   2       0              125           0.983667  {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 10}
+   3       0              125           0.983667  {'criterion': 'log_loss', 'max_depth': None, 'max_features': 6, 'min_samples_split': 6}
+ ...     ...              ...                ...  ...
+  15       2              500           0.951958  {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10}
+  16       2              500           0.947958  {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 10}
+  17       2              500           0.951958  {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 4}
+  18       3             1000           0.961009  {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10}
+  19       3             1000           0.955989  {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 4}
+====  ======  ===============  =================  ========================================================================================
+
+Each row corresponds to a given parameter combination (a candidate) and a given
+iteration. The iteration is given by the ``iter`` column. The ``n_resources``
+column tells you how many resources were used.
+
+In the example above, the best parameter combination is ``{'criterion':
+'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10}``
+since it has reached the last iteration (3) with the highest score:
+0.96.
+
+.. topic:: References:
+
+    .. [1] K. Jamieson, A. Talwalkar,
+       `Non-stochastic Best Arm Identification and Hyperparameter
+       Optimization <http://proceedings.mlr.press/v51/jamieson16.html>`_, in
+       proc. of Machine Learning Research, 2016.
+    .. [2] L. Li, K. Jamieson, G. DeSalvo, A. Rostamizadeh, A. Talwalkar,
+       :arxiv:`Hyperband: A Novel Bandit-Based Approach to Hyperparameter Optimization
+       <1603.06560>`, in Machine Learning Research 18, 2018.
+
 .. _grid_search_tips:
 
 Tips for parameter search
@@ -183,18 +557,16 @@ to evaluate a parameter setting. These a
 :func:`sklearn.metrics.r2_score` for regression.  For some applications,
 other scoring functions are better suited (for example in unbalanced
 classification, the accuracy score is often uninformative). An alternative
-scoring function can be specified via the ``scoring`` parameter to
-:class:`GridSearchCV`, :class:`RandomizedSearchCV` and many of the
-specialized cross-validation tools described below.
-See :ref:`scoring_parameter` for more details.
+scoring function can be specified via the ``scoring`` parameter of most
+parameter search tools. See :ref:`scoring_parameter` for more details.
 
 .. _multimetric_grid_search:
 
 Specifying multiple metrics for evaluation
 ------------------------------------------
 
-``GridSearchCV`` and ``RandomizedSearchCV`` allow specifying multiple metrics
-for the ``scoring`` parameter.
+:class:`GridSearchCV` and :class:`RandomizedSearchCV` allow specifying
+multiple metrics for the ``scoring`` parameter.
 
 Multimetric scoring can either be specified as a list of strings of predefined
 scores names or a dict mapping the scorer name to the scorer function and/or
@@ -209,6 +581,9 @@ result in an error when using multiple m
 See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py`
 for an example usage.
 
+:class:`HalvingRandomSearchCV` and :class:`HalvingGridSearchCV` do not support
+multimetric scoring.
+
 .. _composite_grid_search:
 
 Composite estimators and parameter spaces
@@ -253,6 +628,8 @@ levels of nesting::
   ...    'model__base_estimator__max_depth': [2, 4, 6, 8]}
   >>> search = GridSearchCV(pipe, param_grid, cv=5).fit(X, y)
 
+Please refer to :ref:`pipeline` for performing parameter searches over
+pipelines.
 
 Model selection: development and evaluation
 -------------------------------------------
@@ -263,7 +640,7 @@ to use the labeled data to "train" the p
 When evaluating the resulting model it is important to do it on
 held-out samples that were not seen during the grid search process:
 it is recommended to split the data into a **development set** (to
-be fed to the ``GridSearchCV`` instance) and an **evaluation set**
+be fed to the :class:`GridSearchCV` instance) and an **evaluation set**
 to compute performance metrics.
 
 This can be done by using the :func:`train_test_split`
@@ -272,10 +649,10 @@ utility function.
 Parallelism
 -----------
 
-:class:`GridSearchCV` and :class:`RandomizedSearchCV` evaluate each parameter
-setting independently.  Computations can be run in parallel if your OS
-supports it, by using the keyword ``n_jobs=-1``. See function signature for
-more details.
+The parameter search tools evaluate each parameter combination on each data
+fold independently. Computations can be run in parallel by using the keyword
+``n_jobs=-1``. See function signature for more details, and also the Glossary
+entry for :term:`n_jobs`.
 
 Robustness to failure
 ---------------------
@@ -310,8 +687,6 @@ Here is the list of such models:
 .. currentmodule:: sklearn
 
 .. autosummary::
-   :toctree: generated/
-   :template: class.rst
 
    linear_model.ElasticNetCV
    linear_model.LarsCV
@@ -337,8 +712,6 @@ Criterion (AIC) or the Bayesian Informat
 model selection:
 
 .. autosummary::
-   :toctree: generated/
-   :template: class.rst
 
    linear_model.LassoLarsIC
 
@@ -361,8 +734,6 @@ model selection.
 This is currently implemented in the following classes:
 
 .. autosummary::
-   :toctree: generated/
-   :template: class.rst
 
     ensemble.RandomForestClassifier
     ensemble.RandomForestRegressor
diff -pruN 0.23.2-5/doc/modules/impute.rst 1.1.1-1/doc/modules/impute.rst
--- 0.23.2-5/doc/modules/impute.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/impute.rst	2022-05-19 12:16:26.452782900 +0000
@@ -285,14 +285,14 @@ some missing values to it.
   >>> from sklearn.pipeline import FeatureUnion, make_pipeline
   >>> from sklearn.tree import DecisionTreeClassifier
   >>> X, y = load_iris(return_X_y=True)
-  >>> mask = np.random.randint(0, 2, size=X.shape).astype(np.bool)
+  >>> mask = np.random.randint(0, 2, size=X.shape).astype(bool)
   >>> X[mask] = np.nan
   >>> X_train, X_test, y_train, _ = train_test_split(X, y, test_size=100,
   ...                                                random_state=0)
 
 Now we create a :class:`FeatureUnion`. All features will be imputed using
 :class:`SimpleImputer`, in order to enable classifiers to work with this data.
-Additionally, it adds the the indicator variables from
+Additionally, it adds the indicator variables from
 :class:`MissingIndicator`.
 
   >>> transformer = FeatureUnion(
@@ -313,3 +313,12 @@ wrap this in a :class:`Pipeline` with a
   >>> results = clf.predict(X_test)
   >>> results.shape
   (100,)
+
+Estimators that handle NaN values
+=================================
+
+Some estimators are designed to handle NaN values without preprocessing. 
+Below is the list of these estimators, classified by type 
+(cluster, regressor, classifier, transform) :
+
+.. allow_nan_estimators::
diff -pruN 0.23.2-5/doc/modules/kernel_approximation.rst 1.1.1-1/doc/modules/kernel_approximation.rst
--- 0.23.2-5/doc/modules/kernel_approximation.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/kernel_approximation.rst	2022-05-19 12:16:26.452782900 +0000
@@ -149,6 +149,51 @@ above for the :class:`RBFSampler`. The o
 parameter, that is called :math:`c`.
 For a motivation for this mapping and the mathematical details see [LS2010]_.
 
+.. _polynomial_kernel_approx:
+
+Polynomial Kernel Approximation via Tensor Sketch
+-------------------------------------------------
+
+The :ref:`polynomial kernel <polynomial_kernel>` is a popular type of kernel
+function given by:
+
+.. math::
+
+        k(x, y) = (\gamma x^\top y +c_0)^d
+
+where:
+
+    * ``x``, ``y`` are the input vectors
+    * ``d`` is the kernel degree
+
+Intuitively, the feature space of the polynomial kernel of degree `d`
+consists of all possible degree-`d` products among input features, which enables
+learning algorithms using this kernel to account for interactions between features.
+
+The TensorSketch [PP2013]_ method, as implemented in :class:`PolynomialCountSketch`, is a
+scalable, input data independent method for polynomial kernel approximation.
+It is based on the concept of Count sketch [WIKICS]_ [CCF2002]_ , a dimensionality
+reduction technique similar to feature hashing, which instead uses several
+independent hash functions. TensorSketch obtains a Count Sketch of the outer product
+of two vectors (or a vector with itself), which can be used as an approximation of the
+polynomial kernel feature space. In particular, instead of explicitly computing
+the outer product, TensorSketch computes the Count Sketch of the vectors and then
+uses polynomial multiplication via the Fast Fourier Transform to compute the
+Count Sketch of their outer product.
+
+Conveniently, the training phase of TensorSketch simply consists of initializing
+some random variables. It is thus independent of the input data, i.e. it only
+depends on the number of input features, but not the data values.
+In addition, this method can transform samples in
+:math:`\mathcal{O}(n_{\text{samples}}(n_{\text{features}} + n_{\text{components}} \log(n_{\text{components}})))`
+time, where :math:`n_{\text{components}}` is the desired output dimension,
+determined by ``n_components``.
+
+.. topic:: Examples:
+
+    * :ref:`sphx_glr_auto_examples_kernel_approximation_plot_scalable_poly_kernels.py`
+
+.. _tensor_sketch_kernel_approx:
 
 Mathematical Details
 --------------------
@@ -189,10 +234,10 @@ or store training examples.
 .. topic:: References:
 
     .. [RR2007] `"Random features for large-scale kernel machines"
-      <https://www.robots.ox.ac.uk/~vgg/rg/papers/randomfeatures.pdf>`_
+      <https://papers.nips.cc/paper/2007/hash/013a006f03dbc5392effeb8f18fda755-Abstract.html>`_
       Rahimi, A. and Recht, B. - Advances in neural information processing 2007,
     .. [LS2010] `"Random Fourier approximations for skewed multiplicative histogram kernels"
-      <http://www.maths.lth.se/matematiklth/personal/sminchis/papers/lis_dagm10.pdf>`_
+      <https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.227.1802&rep=rep1&type=pdf>`_
       Random Fourier approximations for skewed multiplicative histogram kernels
       - Lecture Notes for Computer Sciencd (DAGM)
     .. [VZ2010] `"Efficient additive kernels via explicit feature maps"
@@ -201,3 +246,11 @@ or store training examples.
     .. [VVZ2010] `"Generalized RBF feature maps for Efficient Detection"
       <https://www.robots.ox.ac.uk/~vgg/publications/2010/Sreekanth10/sreekanth10.pdf>`_
       Vempati, S. and Vedaldi, A. and Zisserman, A. and Jawahar, CV - 2010
+    .. [PP2013] :doi:`"Fast and scalable polynomial kernels via explicit feature maps"
+      <10.1145/2487575.2487591>`
+      Pham, N., & Pagh, R. - 2013
+    .. [CCF2002] `"Finding frequent items in data streams"
+      <http://www.cs.princeton.edu/courses/archive/spring04/cos598B/bib/CharikarCF.pdf>`_
+      Charikar, M., Chen, K., & Farach-Colton - 2002
+    .. [WIKICS] `"Wikipedia: Count sketch"
+      <https://en.wikipedia.org/wiki/Count_sketch>`_
diff -pruN 0.23.2-5/doc/modules/label_propagation.rst 1.1.1-1/doc/modules/label_propagation.rst
--- 0.23.2-5/doc/modules/label_propagation.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/label_propagation.rst	1970-01-01 00:00:00.000000000 +0000
@@ -1,100 +0,0 @@
-.. _semi_supervised:
-
-===================================================
-Semi-Supervised
-===================================================
-
-.. currentmodule:: sklearn.semi_supervised
-
-`Semi-supervised learning
-<https://en.wikipedia.org/wiki/Semi-supervised_learning>`_ is a situation
-in which in your training data some of the samples are not labeled. The
-semi-supervised estimators in :mod:`sklearn.semi_supervised` are able to
-make use of this additional unlabeled data to better capture the shape of
-the underlying data distribution and generalize better to new samples.
-These algorithms can perform well when we have a very small amount of
-labeled points and a large amount of unlabeled points.
-
-.. topic:: Unlabeled entries in `y`
-
-    It is important to assign an identifier to unlabeled points along with the
-    labeled data when training the model with the ``fit`` method. The identifier
-    that this implementation uses is the integer value :math:`-1`.
-
-.. _label_propagation:
-
-Label Propagation
-=================
-
-Label propagation denotes a few variations of semi-supervised graph
-inference algorithms. 
-
-A few features available in this model:
-  * Can be used for classification and regression tasks
-  * Kernel methods to project data into alternate dimensional spaces
-
-`scikit-learn` provides two label propagation models:
-:class:`LabelPropagation` and :class:`LabelSpreading`. Both work by
-constructing a similarity graph over all items in the input dataset. 
-
-.. figure:: ../auto_examples/semi_supervised/images/sphx_glr_plot_label_propagation_structure_001.png
-    :target: ../auto_examples/semi_supervised/plot_label_propagation_structure.html
-    :align: center
-    :scale: 60%
-
-    **An illustration of label-propagation:** *the structure of unlabeled
-    observations is consistent with the class structure, and thus the
-    class label can be propagated to the unlabeled observations of the
-    training set.*
-
-:class:`LabelPropagation` and :class:`LabelSpreading`
-differ in modifications to the similarity matrix that graph and the
-clamping effect on the label distributions.
-Clamping allows the algorithm to change the weight of the true ground labeled
-data to some degree. The :class:`LabelPropagation` algorithm performs hard
-clamping of input labels, which means :math:`\alpha=0`. This clamping factor
-can be relaxed, to say :math:`\alpha=0.2`, which means that we will always
-retain 80 percent of our original label distribution, but the algorithm gets to
-change its confidence of the distribution within 20 percent.
-
-:class:`LabelPropagation` uses the raw similarity matrix constructed from
-the data with no modifications. In contrast, :class:`LabelSpreading`
-minimizes a loss function that has regularization properties, as such it
-is often more robust to noise. The algorithm iterates on a modified
-version of the original graph and normalizes the edge weights by
-computing the normalized graph Laplacian matrix. This procedure is also
-used in :ref:`spectral_clustering`.
-
-Label propagation models have two built-in kernel methods. Choice of kernel
-effects both scalability and performance of the algorithms. The following are
-available:
-
-  * rbf (:math:`\exp(-\gamma |x-y|^2), \gamma > 0`). :math:`\gamma` is
-    specified by keyword gamma.
-
-  * knn (:math:`1[x' \in kNN(x)]`). :math:`k` is specified by keyword
-    n_neighbors.
-
-The RBF kernel will produce a fully connected graph which is represented in memory
-by a dense matrix. This matrix may be very large and combined with the cost of
-performing a full matrix multiplication calculation for each iteration of the
-algorithm can lead to prohibitively long running times. On the other hand,
-the KNN kernel will produce a much more memory-friendly sparse matrix
-which can drastically reduce running times.
-
-.. topic:: Examples
-
-  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_versus_svm_iris.py`
-  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_structure.py`
-  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits.py`
-  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits_active_learning.py`
-
-.. topic:: References
-
-    [1] Yoshua Bengio, Olivier Delalleau, Nicolas Le Roux. In Semi-Supervised
-    Learning (2006), pp. 193-216
-
-    [2] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient
-    Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005
-    https://research.microsoft.com/en-us/people/nicolasl/efficient_ssl.pdf
-
diff -pruN 0.23.2-5/doc/modules/lda_qda.rst 1.1.1-1/doc/modules/lda_qda.rst
--- 0.23.2-5/doc/modules/lda_qda.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/lda_qda.rst	2022-05-19 12:16:26.452782900 +0000
@@ -136,7 +136,7 @@ Mathematical formulation of LDA dimensio
 
 First note that the K means :math:`\mu_k` are vectors in
 :math:`\mathcal{R}^d`, and they lie in an affine subspace :math:`H` of
-dimension at least :math:`K - 1` (2 points lie on a line, 3 points lie on a
+dimension at most :math:`K - 1` (2 points lie on a line, 3 points lie on a
 plane, etc).
 
 As mentioned above, we can interpret LDA as assigning :math:`x` to the class
@@ -163,8 +163,8 @@ transformed class means :math:`\mu^*_k`)
 :func:`~discriminant_analysis.LinearDiscriminantAnalysis.transform` method. See
 [1]_ for more details.
 
-Shrinkage
-=========
+Shrinkage and Covariance Estimator
+==================================
 
 Shrinkage is a form of regularization used to improve the estimation of
 covariance matrices in situations where the number of training samples is
@@ -187,12 +187,33 @@ an estimate for the covariance matrix).
 between these two extrema will estimate a shrunk version of the covariance
 matrix.
 
+The shrunk Ledoit and Wolf estimator of covariance may not always be the
+best choice. For example if the distribution of the data
+is normally distributed, the
+Oracle Shrinkage Approximating estimator :class:`sklearn.covariance.OAS`
+yields a smaller Mean Squared Error than the one given by Ledoit and Wolf's
+formula used with shrinkage="auto". In LDA, the data are assumed to be gaussian
+conditionally to the class. If these assumptions hold, using LDA with
+the OAS estimator of covariance will yield a better classification 
+accuracy than if Ledoit and Wolf or the empirical covariance estimator is used.
+
+The covariance estimator can be chosen using with the ``covariance_estimator``
+parameter of the :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+class. A covariance estimator should have a :term:`fit` method and a
+``covariance_`` attribute like all covariance estimators in the
+:mod:`sklearn.covariance` module.
+
+
 .. |shrinkage| image:: ../auto_examples/classification/images/sphx_glr_plot_lda_001.png
         :target: ../auto_examples/classification/plot_lda.html
         :scale: 75
 
 .. centered:: |shrinkage|
 
+.. topic:: Examples:
+
+    :ref:`sphx_glr_auto_examples_classification_plot_lda.py`: Comparison of LDA classifiers
+    with Empirical, Ledoit Wolf and OAS covariance estimator.
 
 Estimation algorithms
 =====================
@@ -211,16 +232,17 @@ solver may be preferable in situations w
 The 'svd' solver cannot be used with shrinkage.
 For QDA, the use of the SVD solver relies on the fact that the covariance
 matrix :math:`\Sigma_k` is, by definition, equal to :math:`\frac{1}{n - 1}
-X_k^tX_k = V S^2 V^t` where :math:`V` comes from the SVD of the (centered)
+X_k^tX_k = \frac{1}{n - 1} V S^2 V^t` where :math:`V` comes from the SVD of the (centered)
 matrix: :math:`X_k = U S V^t`. It turns out that we can compute the
-log-posterior above without having to explictly compute :math:`\Sigma`:
+log-posterior above without having to explicitly compute :math:`\Sigma`:
 computing :math:`S` and :math:`V` via the SVD of :math:`X` is enough. For
 LDA, two SVDs are computed: the SVD of the centered input matrix :math:`X`
 and the SVD of the class-wise mean vectors.
 
 The 'lsqr' solver is an efficient algorithm that only works for
 classification. It needs to explicitly compute the covariance matrix
-:math:`\Sigma`, and supports shrinkage. This solver computes the coefficients
+:math:`\Sigma`, and supports shrinkage and custom covariance estimators.
+This solver computes the coefficients
 :math:`\omega_k = \Sigma^{-1}\mu_k` by solving for :math:`\Sigma \omega =
 \mu_k`, thus avoiding the explicit computation of the inverse
 :math:`\Sigma^{-1}`.
@@ -231,11 +253,6 @@ transform, and it supports shrinkage. Ho
 compute the covariance matrix, so it might not be suitable for situations with
 a high number of features.
 
-.. topic:: Examples:
-
-    :ref:`sphx_glr_auto_examples_classification_plot_lda.py`: Comparison of LDA classifiers
-    with and without shrinkage.
-
 .. topic:: References:
 
    .. [1] "The Elements of Statistical Learning", Hastie T., Tibshirani R.,
diff -pruN 0.23.2-5/doc/modules/learning_curve.rst 1.1.1-1/doc/modules/learning_curve.rst
--- 0.23.2-5/doc/modules/learning_curve.rst	2020-08-04 12:12:58.876675400 +0000
+++ 1.1.1-1/doc/modules/learning_curve.rst	2022-05-19 12:16:26.452782900 +0000
@@ -53,9 +53,9 @@ Validation curve
 
 To validate a model we need a scoring function (see :ref:`model_evaluation`),
 for example accuracy for classifiers. The proper way of choosing multiple
-hyperparameters of an estimator are of course grid search or similar methods
+hyperparameters of an estimator is of course grid search or similar methods
 (see :ref:`grid_search`) that select the hyperparameter with the maximum score
-on a validation set or multiple validation sets. Note that if we optimized
+on a validation set or multiple validation sets. Note that if we optimize
 the hyperparameters based on a validation score the validation score is biased
 and not a good estimate of the generalization any longer. To get a proper
 estimate of the generalization we have to compute the score on another test
@@ -79,9 +79,9 @@ The function :func:`validation_curve` ca
   >>> np.random.shuffle(indices)
   >>> X, y = X[indices], y[indices]
 
-  >>> train_scores, valid_scores = validation_curve(Ridge(), X, y, "alpha",
-  ...                                               np.logspace(-7, 3, 3),
-  ...                                               cv=5)
+  >>> train_scores, valid_scores = validation_curve(
+  ...     Ridge(), X, y, param_name="alpha", param_range=np.logspace(-7, 3, 3),
+  ...     cv=5)
   >>> train_scores
   array([[0.93..., 0.94..., 0.92..., 0.91..., 0.92...],
          [0.93..., 0.94..., 0.92..., 0.91..., 0.92...],
@@ -94,9 +94,9 @@ The function :func:`validation_curve` ca
 If the training score and the validation score are both low, the estimator will
 be underfitting. If the training score is high and the validation score is low,
 the estimator is overfitting and otherwise it is working very well. A low
-training score and a high validation score is usually not possible. All three
-cases can be found in the plot below where we vary the parameter
-:math:`\gamma` of an SVM on the digits dataset.
+training score and a high validation score is usually not possible. Underfitting, 
+overfitting, and a working model are shown in the in the plot below where we vary 
+the parameter :math:`\gamma` of an SVM on the digits dataset.
 
 .. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_validation_curve_001.png
    :target: ../auto_examples/model_selection/plot_validation_curve.html
diff -pruN 0.23.2-5/doc/modules/linear_model.rst 1.1.1-1/doc/modules/linear_model.rst
--- 0.23.2-5/doc/modules/linear_model.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/linear_model.rst	2022-05-19 12:16:26.452782900 +0000
@@ -50,7 +50,7 @@ and will store the coefficients :math:`w
 
 The coefficient estimates for Ordinary Least Squares rely on the
 independence of the features. When features are correlated and the
-columns of the design matrix :math:`X` have an approximate linear
+columns of the design matrix :math:`X` have an approximately linear
 dependence, the design matrix becomes close to singular
 and as a result, the least-squares estimate becomes highly sensitive
 to random errors in the observed target, producing a large
@@ -61,6 +61,19 @@ example, when data are collected without
 
    * :ref:`sphx_glr_auto_examples_linear_model_plot_ols.py`
 
+Non-Negative Least Squares
+--------------------------
+
+It is possible to constrain all the coefficients to be non-negative, which may
+be useful when they represent some physical or naturally non-negative
+quantities (e.g., frequency counts or prices of goods).
+:class:`LinearRegression` accepts a boolean ``positive``
+parameter: when set to `True` `Non-Negative Least Squares
+<https://en.wikipedia.org/wiki/Non-negative_least_squares>`_ are then applied.
+
+.. topic:: Examples:
+
+   * :ref:`sphx_glr_auto_examples_linear_model_plot_nnls.py`
 
 Ordinary Least Squares Complexity
 ---------------------------------
@@ -127,15 +140,15 @@ the output with the highest value.
 
 It might seem questionable to use a (penalized) Least Squares loss to fit a
 classification model instead of the more traditional logistic or hinge
-losses. However in practice all those models can lead to similar
+losses. However, in practice, all those models can lead to similar
 cross-validation scores in terms of accuracy or precision/recall, while the
 penalized least squares loss used by the :class:`RidgeClassifier` allows for
 a very different choice of the numerical solvers with distinct computational
 performance profiles.
 
 The :class:`RidgeClassifier` can be significantly faster than e.g.
-:class:`LogisticRegression` with a high number of classes, because it is
-able to compute the projection matrix :math:`(X^T X)^{-1} X^T` only once.
+:class:`LogisticRegression` with a high number of classes because it can
+compute the projection matrix :math:`(X^T X)^{-1} X^T` only once.
 
 This classifier is sometimes referred to as a `Least Squares Support Vector
 Machines
@@ -160,13 +173,12 @@ This method has the same order of comple
 .. between these
 
 
-Setting the regularization parameter: generalized Cross-Validation
-------------------------------------------------------------------
+Setting the regularization parameter: leave-one-out Cross-Validation
+--------------------------------------------------------------------
 
 :class:`RidgeCV` implements ridge regression with built-in
 cross-validation of the alpha parameter. The object works in the same way
-as GridSearchCV except that it defaults to Generalized Cross-Validation
-(GCV), an efficient form of leave-one-out cross-validation::
+as GridSearchCV except that it defaults to Leave-One-Out Cross-Validation::
 
     >>> import numpy as np
     >>> from sklearn import linear_model
@@ -179,7 +191,7 @@ as GridSearchCV except that it defaults
 
 Specifying the value of the :term:`cv` attribute will trigger the use of
 cross-validation with :class:`~sklearn.model_selection.GridSearchCV`, for
-example `cv=10` for 10-fold cross-validation, rather than Generalized
+example `cv=10` for 10-fold cross-validation, rather than Leave-One-Out
 Cross-Validation.
 
 .. topic:: References
@@ -198,7 +210,7 @@ Lasso
 The :class:`Lasso` is a linear model that estimates sparse coefficients.
 It is useful in some contexts due to its tendency to prefer solutions
 with fewer non-zero coefficients, effectively reducing the number of
-features upon which the given solution is dependent. For this reason
+features upon which the given solution is dependent. For this reason,
 Lasso and its variants are fundamental to the field of compressed sensing.
 Under certain conditions, it can recover the exact set of non-zero
 coefficients (see
@@ -286,6 +298,7 @@ features, it is often faster than :class
 
 .. centered:: |lasso_cv_1| |lasso_cv_2|
 
+.. _lasso_lars_ic:
 
 Information-criteria based model selection
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -294,22 +307,92 @@ Alternatively, the estimator :class:`Las
 Akaike information criterion (AIC) and the Bayes Information criterion (BIC).
 It is a computationally cheaper alternative to find the optimal value of alpha
 as the regularization path is computed only once instead of k+1 times
-when using k-fold cross-validation. However, such criteria needs a
-proper estimation of the degrees of freedom of the solution, are
-derived for large samples (asymptotic results) and assume the model
-is correct, i.e. that the data are actually generated by this model.
-They also tend to break when the problem is badly conditioned
-(more features than samples).
+when using k-fold cross-validation.
 
-.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_lasso_model_selection_001.png
-    :target: ../auto_examples/linear_model/plot_lasso_model_selection.html
+Indeed, these criteria are computed on the in-sample training set. In short,
+they penalize the over-optimistic scores of the different Lasso models by
+their flexibility (cf. to "Mathematical details" section below).
+
+However, such criteria need a proper estimation of the degrees of freedom of
+the solution, are derived for large samples (asymptotic results) and assume the
+correct model is candidates under investigation. They also tend to break when
+the problem is badly conditioned (e.g. more features than samples).
+
+.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_lasso_lars_ic_001.png
+    :target: ../auto_examples/linear_model/plot_lasso_lars_ic.html
     :align: center
     :scale: 50%
 
+.. _aic_bic:
+
+**Mathematical details**
+
+The definition of AIC (and thus BIC) might differ in the literature. In this
+section, we give more information regarding the criterion computed in
+scikit-learn. The AIC criterion is defined as:
+
+.. math::
+    AIC = -2 \log(\hat{L}) + 2 d
+
+where :math:`\hat{L}` is the maximum likelihood of the model and
+:math:`d` is the number of parameters (as well referred to as degrees of
+freedom in the previous section).
+
+The definition of BIC replace the constant :math:`2` by :math:`\log(N)`:
+
+.. math::
+    BIC = -2 \log(\hat{L}) + \log(N) d
+
+where :math:`N` is the number of samples.
+
+For a linear Gaussian model, the maximum log-likelihood is defined as:
+
+.. math::
+    \log(\hat{L}) = - \frac{n}{2} \log(2 \pi) - \frac{n}{2} \ln(\sigma^2) - \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{2\sigma^2}
+
+where :math:`\sigma^2` is an estimate of the noise variance,
+:math:`y_i` and :math:`\hat{y}_i` are respectively the true and predicted
+targets, and :math:`n` is the number of samples.
+
+Plugging the maximum log-likelihood in the AIC formula yields:
+
+.. math::
+    AIC = n \log(2 \pi \sigma^2) + \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{\sigma^2} + 2 d
+
+The first term of the above expression is sometimes discarded since it is a
+constant when :math:`\sigma^2` is provided. In addition,
+it is sometimes stated that the AIC is equivalent to the :math:`C_p` statistic
+[12]_. In a strict sense, however, it is equivalent only up to some constant
+and a multiplicative factor.
+
+At last, we mentioned above that :math:`\sigma^2` is an estimate of the
+noise variance. In :class:`LassoLarsIC` when the parameter `noise_variance` is
+not provided (default), the noise variance is estimated via the unbiased
+estimator [13]_ defined as:
+
+.. math::
+    \sigma^2 = \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{n - p}
+
+where :math:`p` is the number of features and :math:`\hat{y}_i` is the
+predicted target using an ordinary least squares regression. Note, that this
+formula is valid only when `n_samples > n_features`.
 
 .. topic:: Examples:
 
   * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py`
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars_ic.py`
+
+.. topic:: References
+
+  .. [12] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani.
+           "On the degrees of freedom of the lasso."
+           The Annals of Statistics 35.5 (2007): 2173-2192.
+           <0712.0881.pdf>`
+
+  .. [13] `Cherkassky, Vladimir, and Yunqian Ma.
+           "Comparison of model selection for regression."
+           Neural computation 15.7 (2003): 1691-1714.
+           <https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.392.8794&rep=rep1&type=pdf>`_
 
 Comparison with the regularization parameter of SVM
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -355,7 +438,7 @@ Mathematically, it consists of a linear
 :math:`\ell_1` :math:`\ell_2`-norm for regularization.
 The objective function to minimize is:
 
-.. math::  \min_{w} { \frac{1}{2n_{\text{samples}}} ||X W - Y||_{\text{Fro}} ^ 2 + \alpha ||W||_{21}}
+.. math::  \min_{W} { \frac{1}{2n_{\text{samples}}} ||X W - Y||_{\text{Fro}} ^ 2 + \alpha ||W||_{21}}
 
 where :math:`\text{Fro}` indicates the Frobenius norm
 
@@ -381,7 +464,7 @@ the regularization properties of :class:
 combination of :math:`\ell_1` and :math:`\ell_2` using the ``l1_ratio``
 parameter.
 
-Elastic-net is useful when there are multiple features which are
+Elastic-net is useful when there are multiple features that are
 correlated with one another. Lasso is likely to pick one of these
 at random, while elastic-net is likely to pick both.
 
@@ -488,7 +571,7 @@ The disadvantages of the LARS method inc
     in the discussion section of the Efron et al. (2004) Annals of
     Statistics article.
 
-The LARS model can be used using estimator :class:`Lars`, or its
+The LARS model can be used using via the estimator :class:`Lars`, or its
 low-level implementation :func:`lars_path` or :func:`lars_path_gram`.
 
 
@@ -508,11 +591,11 @@ function of the norm of its coefficients
 ::
 
    >>> from sklearn import linear_model
-   >>> reg = linear_model.LassoLars(alpha=.1)
+   >>> reg = linear_model.LassoLars(alpha=.1, normalize=False)
    >>> reg.fit([[0, 0], [1, 1]], [0, 1])
-   LassoLars(alpha=0.1)
+   LassoLars(alpha=0.1, normalize=False)
    >>> reg.coef_
-   array([0.717157..., 0.        ])
+   array([0.6..., 0.        ])
 
 .. topic:: Examples:
 
@@ -534,7 +617,7 @@ the residual.
 Instead of giving a vector result, the LARS solution consists of a
 curve denoting the solution for each value of the :math:`\ell_1` norm of the
 parameter vector. The full coefficients path is stored in the array
-``coef_path_``, which has size (n_features, max_features+1). The first
+``coef_path_`` of shape `(n_features, max_features + 1)`. The first
 column is always zero.
 
 .. topic:: References:
@@ -582,7 +665,7 @@ previously chosen dictionary elements.
  * https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
 
  * `Matching pursuits with time-frequency dictionaries
-   <http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf>`_,
+   <https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf>`_,
    S. G. Mallat, Z. Zhang,
 
 
@@ -663,13 +746,6 @@ There are four more hyperparameters, :ma
 :math:`\alpha` and :math:`\lambda`. These are usually chosen to be
 *non-informative*. By default :math:`\alpha_1 = \alpha_2 =  \lambda_1 = \lambda_2 = 10^{-6}`.
 
-
-.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_bayesian_ridge_001.png
-   :target: ../auto_examples/linear_model/plot_bayesian_ridge.html
-   :align: center
-   :scale: 50%
-
-
 Bayesian Ridge Regression is used for regression::
 
     >>> from sklearn import linear_model
@@ -695,7 +771,6 @@ is more robust to ill-posed problems.
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_linear_model_plot_bayesian_ridge.py`
  * :ref:`sphx_glr_auto_examples_linear_model_plot_bayesian_ridge_curvefit.py`
 
 .. topic:: References:
@@ -706,37 +781,35 @@ is more robust to ill-posed problems.
 
     * Michael E. Tipping, `Sparse Bayesian Learning and the Relevance Vector Machine <http://www.jmlr.org/papers/volume1/tipping01a/tipping01a.pdf>`_, 2001.
 
+.. _automatic_relevance_determination:
 
 Automatic Relevance Determination - ARD
 ---------------------------------------
 
-:class:`ARDRegression` is very similar to `Bayesian Ridge Regression`_,
-but can lead to sparser coefficients :math:`w` [1]_ [2]_.
-:class:`ARDRegression` poses a different prior over :math:`w`, by dropping the
-assumption of the Gaussian being spherical.
-
-Instead, the distribution over :math:`w` is assumed to be an axis-parallel,
-elliptical Gaussian distribution.
-
-This means each coefficient :math:`w_{i}` is drawn from a Gaussian distribution,
-centered on zero and with a precision :math:`\lambda_{i}`:
+The Automatic Relevance Determination (as being implemented in
+:class:`ARDRegression`) is a kind of linear model which is very similar to the
+`Bayesian Ridge Regression`_, but that leads to sparser coefficients :math:`w`
+[1]_ [2]_.
+
+:class:`ARDRegression` poses a different prior over :math:`w`: it drops
+the spherical Gaussian distribution for a centered elliptic Gaussian
+distribution. This means each coefficient :math:`w_{i}` can itself be drawn from
+a Gaussian distribution, centered on zero and with a precision
+:math:`\lambda_{i}`:
 
 .. math:: p(w|\lambda) = \mathcal{N}(w|0,A^{-1})
 
-with :math:`\text{diag}(A) = \lambda = \{\lambda_{1},...,\lambda_{p}\}`.
-
-In contrast to `Bayesian Ridge Regression`_, each coordinate of :math:`w_{i}`
-has its own standard deviation :math:`\lambda_i`. The prior over all
-:math:`\lambda_i` is chosen to be the same gamma distribution given by
-hyperparameters :math:`\lambda_1` and :math:`\lambda_2`.
+with :math:`A` being a positive definite diagonal matrix and
+:math:`\text{diag}(A) = \lambda = \{\lambda_{1},...,\lambda_{p}\}`.
 
-.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_ard_001.png
-   :target: ../auto_examples/linear_model/plot_ard.html
-   :align: center
-   :scale: 50%
-
-ARD is also known in the literature as *Sparse Bayesian Learning* and
-*Relevance Vector Machine* [3]_ [4]_.
+In contrast to the `Bayesian Ridge Regression`_, each coordinate of
+:math:`w_{i}` has its own standard deviation :math:`\frac{1}{\lambda_i}`. The
+prior over all :math:`\lambda_i` is chosen to be the same gamma distribution
+given by the hyperparameters :math:`\lambda_1` and :math:`\lambda_2`.
+
+ARD is also known in the literature as *Sparse Bayesian Learning* and *Relevance
+Vector Machine* [3]_ [4]_. For a worked-out comparison between ARD and `Bayesian
+Ridge Regression`_, see the example below.
 
 .. topic:: Examples:
 
@@ -922,7 +995,9 @@ to warm-starting (see :term:`Glossary <w
 
     .. [6] Mark Schmidt, Nicolas Le Roux, and Francis Bach: `Minimizing Finite Sums with the Stochastic Average Gradient. <https://hal.inria.fr/hal-00860051/document>`_
 
-    .. [7] Aaron Defazio, Francis Bach, Simon Lacoste-Julien: `SAGA: A Fast Incremental Gradient Method With Support for Non-Strongly Convex Composite Objectives. <https://arxiv.org/abs/1407.0202>`_
+    .. [7] Aaron Defazio, Francis Bach, Simon Lacoste-Julien:
+        :arxiv:`SAGA: A Fast Incremental Gradient Method With Support for
+        Non-Strongly Convex Composite Objectives. <1407.0202>`
 
     .. [8] https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm
 
@@ -947,7 +1022,7 @@ reproductive exponential dispersion mode
 
 The minimization problem becomes:
 
-.. math::    \min_{w} \frac{1}{2 n_{\text{samples}}} \sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2,
+.. math::    \min_{w} \frac{1}{2 n_{\text{samples}}} \sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2^2,
 
 where :math:`\alpha` is the L2 regularization penalty. When sample weights are
 provided, the average becomes a weighted average.
@@ -1253,8 +1328,8 @@ Each iteration performs the following st
    whether the estimated model is valid (see ``is_model_valid``).
 3. Classify all data as inliers or outliers by calculating the residuals
    to the estimated model (``base_estimator.predict(X) - y``) - all data
-   samples with absolute residuals smaller than the ``residual_threshold``
-   are considered as inliers.
+   samples with absolute residuals smaller than or equal to the
+   ``residual_threshold`` are considered as inliers.
 4. Save fitted model as best model if number of inlier samples is
    maximal. In case the current estimated model has the same number of
    inliers, it is only considered as the best model if it has better score.
@@ -1281,7 +1356,7 @@ performance.
  * https://en.wikipedia.org/wiki/RANSAC
  * `"Random Sample Consensus: A Paradigm for Model Fitting with Applications to
    Image Analysis and Automated Cartography"
-   <https://www.sri.com/sites/default/files/publications/ransac-publication.pdf>`_
+   <https://www.cs.ait.ac.th/~mdailey/cvreadings/Fischler-RANSAC.pdf>`_
    Martin A. Fischler and Robert C. Bolles - SRI International (1981)
  * `"Performance Evaluation of RANSAC Family"
    <http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf>`_
@@ -1411,6 +1486,83 @@ Note that this estimator is different fr
 squares implementation with weights given to each sample on the basis of how much the residual is
 greater than a certain threshold.
 
+.. _quantile_regression:
+
+Quantile Regression
+===================
+
+Quantile regression estimates the median or other quantiles of :math:`y`
+conditional on :math:`X`, while ordinary least squares (OLS) estimates the
+conditional mean.
+
+As a linear model, the :class:`QuantileRegressor` gives linear predictions
+:math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile, :math:`q \in (0, 1)`.
+The weights or coefficients :math:`w` are then found by the following
+minimization problem:
+
+.. math::
+    \min_{w} {\frac{1}{n_{\text{samples}}}
+    \sum_i PB_q(y_i - X_i w) + \alpha ||w||_1}.
+
+This consists of the pinball loss (also known as linear loss),
+see also :class:`~sklearn.metrics.mean_pinball_loss`,
+
+.. math::
+    PB_q(t) = q \max(t, 0) + (1 - q) \max(-t, 0) =
+    \begin{cases}
+        q t, & t > 0, \\
+        0,    & t = 0, \\
+        (1-q) t, & t < 0
+    \end{cases}
+
+and the L1 penalty controlled by parameter ``alpha``, similar to
+:class:`Lasso`.
+
+As the pinball loss is only linear in the residuals, quantile regression is
+much more robust to outliers than squared error based estimation of the mean.
+Somewhat in between is the :class:`HuberRegressor`.
+
+Quantile regression may be useful if one is interested in predicting an
+interval instead of point prediction. Sometimes, prediction intervals are
+calculated based on the assumption that prediction error is distributed
+normally with zero mean and constant variance. Quantile regression provides
+sensible prediction intervals even for errors with non-constant (but
+predictable) variance or non-normal distribution.
+
+.. figure:: /auto_examples/linear_model/images/sphx_glr_plot_quantile_regression_002.png
+   :target: ../auto_examples/linear_model/plot_quantile_regression.html
+   :align: center
+   :scale: 50%
+
+Based on minimizing the pinball loss, conditional quantiles can also be
+estimated by models other than linear models. For example,
+:class:`~sklearn.ensemble.GradientBoostingRegressor` can predict conditional
+quantiles if its parameter ``loss`` is set to ``"quantile"`` and parameter
+``alpha`` is set to the quantile that should be predicted. See the example in
+:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`.
+
+Most implementations of quantile regression are based on linear programming
+problem. The current implementation is based on
+:func:`scipy.optimize.linprog`.
+
+.. topic:: Examples:
+
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py`
+
+.. topic:: References:
+
+  * Koenker, R., & Bassett Jr, G. (1978). `Regression quantiles.
+    <https://gib.people.uic.edu/RQ.pdf>`_
+    Econometrica: journal of the Econometric Society, 33-50.
+
+  * Portnoy, S., & Koenker, R. (1997). :doi:`The Gaussian hare and the Laplacian
+    tortoise: computability of squared-error versus absolute-error estimators.
+    Statistical Science, 12, 279-300 <10.1214/ss/1030037960>`.
+
+  * Koenker, R. (2005). :doi:`Quantile Regression <10.1017/CBO9780511754098>`.
+    Cambridge University Press.
+
+
 .. _polynomial_regression:
 
 Polynomial regression: extending linear models with basis functions
diff -pruN 0.23.2-5/doc/modules/manifold.rst 1.1.1-1/doc/modules/manifold.rst
--- 0.23.2-5/doc/modules/manifold.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/manifold.rst	2022-05-19 12:16:26.452782900 +0000
@@ -23,7 +23,26 @@ Manifold learning
 .. figure:: ../auto_examples/manifold/images/sphx_glr_plot_compare_methods_001.png
    :target: ../auto_examples/manifold/plot_compare_methods.html
    :align: center
-   :scale: 60
+   :scale: 70%
+
+.. |manifold_img3| image:: ../auto_examples/manifold/images/sphx_glr_plot_compare_methods_003.png
+  :target: ../auto_examples/manifold/plot_compare_methods.html
+  :scale: 60%
+
+.. |manifold_img4| image:: ../auto_examples/manifold/images/sphx_glr_plot_compare_methods_004.png
+    :target: ../auto_examples/manifold/plot_compare_methods.html
+    :scale: 60%
+
+.. |manifold_img5| image:: ../auto_examples/manifold/images/sphx_glr_plot_compare_methods_005.png
+    :target: ../auto_examples/manifold/plot_compare_methods.html
+    :scale: 60%
+
+.. |manifold_img6| image:: ../auto_examples/manifold/images/sphx_glr_plot_compare_methods_006.png
+    :target: ../auto_examples/manifold/plot_compare_methods.html
+    :scale: 60%
+
+.. centered:: |manifold_img3| |manifold_img4| |manifold_img5| |manifold_img6|
+
 
 Manifold learning is an approach to non-linear dimensionality reduction.
 Algorithms for this task are based on the idea that the dimensionality of
@@ -116,7 +135,7 @@ Complexity
 The Isomap algorithm comprises three stages:
 
 1. **Nearest neighbor search.**  Isomap uses
-   :class:`sklearn.neighbors.BallTree` for efficient neighbor search.
+   :class:`~sklearn.neighbors.BallTree` for efficient neighbor search.
    The cost is approximately :math:`O[D \log(k) N \log(N)]`, for :math:`k`
    nearest neighbors of :math:`N` points in :math:`D` dimensions.
 
@@ -387,9 +406,9 @@ The overall complexity of standard LTSA
 
 .. topic:: References:
 
-   * `"Principal manifolds and nonlinear dimensionality reduction via
+   * :arxiv:`"Principal manifolds and nonlinear dimensionality reduction via
      tangent space alignment"
-     <http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.3693>`_
+     <cs/0212008>`
      Zhang, Z. & Zha, H. Journal of Shanghai Univ. 8:406 (2004)
 
 .. _multidimensional_scaling:
@@ -555,7 +574,10 @@ between natural clusters in the data. If
 divergence could increase during this phase. Usually it does not have to be
 tuned. A critical parameter is the learning rate. If it is too low gradient
 descent will get stuck in a bad local minimum. If it is too high the KL
-divergence will increase during optimization. More tips can be found in
+divergence will increase during optimization. A heuristic suggested in
+Belkina et al. (2019) is to set the learning rate to the sample size
+divided by the early exaggeration factor. We implement this heuristic
+as `learning_rate='auto'` argument. More tips can be found in
 Laurens van der Maaten's FAQ (see references). The last parameter, angle,
 is a tradeoff between performance and accuracy. Larger angles imply that we
 can approximate larger regions by a single point, leading to better speed
@@ -579,7 +601,7 @@ Barnes-Hut method improves on the exact
   or less. The 2D case is typical when building visualizations.
 * Barnes-Hut only works with dense input data. Sparse data matrices can only be
   embedded with the exact method or can be approximated by a dense low rank
-  projection for instance using :class:`sklearn.decomposition.TruncatedSVD`
+  projection for instance using :class:`~sklearn.decomposition.TruncatedSVD`
 * Barnes-Hut is an approximation of the exact method. The approximation is
   parameterized with the angle parameter, therefore the angle parameter is
   unused when method="exact"
@@ -599,7 +621,7 @@ be well separated by non linear methods
 an SVM with a Gaussian RBF kernel). However, failing to visualize well
 separated homogeneously labeled groups with t-SNE in 2D does not necessarily
 imply that the data cannot be correctly classified by a supervised model. It
-might be the case that 2 dimensions are not low enough to accurately represents
+might be the case that 2 dimensions are not high enough to accurately represent
 the internal structure of the data.
 
 
@@ -614,9 +636,15 @@ the internal structure of the data.
     <https://lvdmaaten.github.io/tsne/>`_
     van der Maaten, L.J.P.
 
-  * `"Accelerating t-SNE using Tree-Based Algorithms."
+  * `"Accelerating t-SNE using Tree-Based Algorithms"
     <https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf>`_
-    L.J.P. van der Maaten.  Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
+    van der Maaten, L.J.P.; Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
+
+  * `"Automated optimized parameters for T-distributed stochastic neighbor
+    embedding improve visualization and analysis of large datasets"
+    <https://www.nature.com/articles/s41467-019-13055-y>`_
+    Belkina, A.C., Ciccolella, C.O., Anno, R., Halpert, R., Spidlen, J.,
+    Snyder-Cappione, J.E., Nature Communications 10, 5415 (2019).
 
 Tips on practical use
 =====================
diff -pruN 0.23.2-5/doc/modules/metrics.rst 1.1.1-1/doc/modules/metrics.rst
--- 0.23.2-5/doc/modules/metrics.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/metrics.rst	2022-05-19 12:16:26.452782900 +0000
@@ -186,7 +186,7 @@ Chi-squared kernel
 The chi-squared kernel is a very popular choice for training non-linear SVMs in
 computer vision applications.
 It can be computed using :func:`chi2_kernel` and then passed to an
-:class:`sklearn.svm.SVC` with ``kernel="precomputed"``::
+:class:`~sklearn.svm.SVC` with ``kernel="precomputed"``::
 
     >>> from sklearn.svm import SVC
     >>> from sklearn.metrics.pairwise import chi2_kernel
diff -pruN 0.23.2-5/doc/modules/mixture.rst 1.1.1-1/doc/modules/mixture.rst
--- 0.23.2-5/doc/modules/mixture.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/mixture.rst	2022-05-19 12:16:26.452782900 +0000
@@ -135,6 +135,43 @@ parameters to maximize the likelihood of
 assignments. Repeating this process is guaranteed to always converge
 to a local optimum.
 
+Choice of the Initialization Method
+-----------------------------------
+
+There is a choice of four initialization methods (as well as inputting user defined
+initial means) to generate the initial centers for the model components:
+
+k-means (default)
+  This applies a traditional k-means clustering algorithm.
+  This can be computationally expensive compared to other initialization methods.
+
+k-means++
+  This uses the initialization method of k-means clustering: k-means++.
+  This will pick the first center at random from the data. Subsequent centers will be
+  chosen from a weighted distribution of the data favouring points further away from
+  existing centers. k-means++ is the default initialization for k-means so will be
+  quicker than running a full k-means but can still take a significant amount of
+  time for large data sets with many components.
+
+random_from_data
+  This will pick random data points from the input data as the initial
+  centers. This is a very fast method of initialization but can produce non-convergent
+  results if the chosen points are too close to each other.
+
+random
+  Centers are chosen as a small perturbation away from the mean of all data.
+  This method is simple but can lead to the model taking longer to converge.
+
+.. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_init_001.png
+   :target: ../auto_examples/mixture/plot_gmm_init.html
+   :align: center
+   :scale: 50%
+
+.. topic:: Examples:
+
+    * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_init.py` for an example of
+      using different initializations in Gaussian Mixture.
+
 .. _bgmm:
 
 Variational Bayesian Gaussian Mixture
diff -pruN 0.23.2-5/doc/modules/model_evaluation.rst 1.1.1-1/doc/modules/model_evaluation.rst
--- 0.23.2-5/doc/modules/model_evaluation.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/model_evaluation.rst	2022-05-19 12:16:26.452782900 +0000
@@ -19,7 +19,7 @@ predictions:
   :class:`model_selection.GridSearchCV`) rely on an internal *scoring* strategy.
   This is discussed in the section :ref:`scoring_parameter`.
 
-* **Metric functions**: The :mod:`metrics` module implements functions
+* **Metric functions**: The :mod:`sklearn.metrics` module implements functions
   assessing prediction error for specific purposes. These metrics are detailed
   in sections on :ref:`classification_metrics`,
   :ref:`multilabel_ranking_metrics`, :ref:`regression_metrics` and
@@ -54,51 +54,57 @@ the model and the data, like :func:`metr
 available as neg_mean_squared_error which return the negated value
 of the metric.
 
-==============================    =============================================     ==================================
-Scoring                           Function                                          Comment
-==============================    =============================================     ==================================
+====================================   ==============================================     ==================================
+Scoring                                Function                                           Comment
+====================================   ==============================================     ==================================
 **Classification**
-'accuracy'                        :func:`metrics.accuracy_score`
-'balanced_accuracy'               :func:`metrics.balanced_accuracy_score`
-'average_precision'               :func:`metrics.average_precision_score`
-'neg_brier_score'                 :func:`metrics.brier_score_loss`
-'f1'                              :func:`metrics.f1_score`                          for binary targets
-'f1_micro'                        :func:`metrics.f1_score`                          micro-averaged
-'f1_macro'                        :func:`metrics.f1_score`                          macro-averaged
-'f1_weighted'                     :func:`metrics.f1_score`                          weighted average
-'f1_samples'                      :func:`metrics.f1_score`                          by multilabel sample
-'neg_log_loss'                    :func:`metrics.log_loss`                          requires ``predict_proba`` support
-'precision' etc.                  :func:`metrics.precision_score`                   suffixes apply as with 'f1'
-'recall' etc.                     :func:`metrics.recall_score`                      suffixes apply as with 'f1'
-'jaccard' etc.                    :func:`metrics.jaccard_score`                     suffixes apply as with 'f1'
-'roc_auc'                         :func:`metrics.roc_auc_score`
-'roc_auc_ovr'                     :func:`metrics.roc_auc_score`
-'roc_auc_ovo'                     :func:`metrics.roc_auc_score`
-'roc_auc_ovr_weighted'            :func:`metrics.roc_auc_score`
-'roc_auc_ovo_weighted'            :func:`metrics.roc_auc_score`
+'accuracy'                             :func:`metrics.accuracy_score`
+'balanced_accuracy'                    :func:`metrics.balanced_accuracy_score`
+'top_k_accuracy'                       :func:`metrics.top_k_accuracy_score`
+'average_precision'                    :func:`metrics.average_precision_score`
+'neg_brier_score'                      :func:`metrics.brier_score_loss`
+'f1'                                   :func:`metrics.f1_score`                           for binary targets
+'f1_micro'                             :func:`metrics.f1_score`                           micro-averaged
+'f1_macro'                             :func:`metrics.f1_score`                           macro-averaged
+'f1_weighted'                          :func:`metrics.f1_score`                           weighted average
+'f1_samples'                           :func:`metrics.f1_score`                           by multilabel sample
+'neg_log_loss'                         :func:`metrics.log_loss`                           requires ``predict_proba`` support
+'precision' etc.                       :func:`metrics.precision_score`                    suffixes apply as with 'f1'
+'recall' etc.                          :func:`metrics.recall_score`                       suffixes apply as with 'f1'
+'jaccard' etc.                         :func:`metrics.jaccard_score`                      suffixes apply as with 'f1'
+'roc_auc'                              :func:`metrics.roc_auc_score`
+'roc_auc_ovr'                          :func:`metrics.roc_auc_score`
+'roc_auc_ovo'                          :func:`metrics.roc_auc_score`
+'roc_auc_ovr_weighted'                 :func:`metrics.roc_auc_score`
+'roc_auc_ovo_weighted'                 :func:`metrics.roc_auc_score`
 
 **Clustering**
-'adjusted_mutual_info_score'      :func:`metrics.adjusted_mutual_info_score`
-'adjusted_rand_score'             :func:`metrics.adjusted_rand_score`
-'completeness_score'              :func:`metrics.completeness_score`
-'fowlkes_mallows_score'           :func:`metrics.fowlkes_mallows_score`
-'homogeneity_score'               :func:`metrics.homogeneity_score`
-'mutual_info_score'               :func:`metrics.mutual_info_score`
-'normalized_mutual_info_score'    :func:`metrics.normalized_mutual_info_score`
-'v_measure_score'                 :func:`metrics.v_measure_score`
+'adjusted_mutual_info_score'           :func:`metrics.adjusted_mutual_info_score`
+'adjusted_rand_score'                  :func:`metrics.adjusted_rand_score`
+'completeness_score'                   :func:`metrics.completeness_score`
+'fowlkes_mallows_score'                :func:`metrics.fowlkes_mallows_score`
+'homogeneity_score'                    :func:`metrics.homogeneity_score`
+'mutual_info_score'                    :func:`metrics.mutual_info_score`
+'normalized_mutual_info_score'         :func:`metrics.normalized_mutual_info_score`
+'rand_score'                           :func:`metrics.rand_score`
+'v_measure_score'                      :func:`metrics.v_measure_score`
 
 **Regression**
-'explained_variance'              :func:`metrics.explained_variance_score`
-'max_error'                       :func:`metrics.max_error`
-'neg_mean_absolute_error'         :func:`metrics.mean_absolute_error`
-'neg_mean_squared_error'          :func:`metrics.mean_squared_error`
-'neg_root_mean_squared_error'     :func:`metrics.mean_squared_error`
-'neg_mean_squared_log_error'      :func:`metrics.mean_squared_log_error`
-'neg_median_absolute_error'       :func:`metrics.median_absolute_error`
-'r2'                              :func:`metrics.r2_score`
-'neg_mean_poisson_deviance'       :func:`metrics.mean_poisson_deviance`
-'neg_mean_gamma_deviance'         :func:`metrics.mean_gamma_deviance`
-==============================    =============================================     ==================================
+'explained_variance'                   :func:`metrics.explained_variance_score`
+'max_error'                            :func:`metrics.max_error`
+'neg_mean_absolute_error'              :func:`metrics.mean_absolute_error`
+'neg_mean_squared_error'               :func:`metrics.mean_squared_error`
+'neg_root_mean_squared_error'          :func:`metrics.mean_squared_error`
+'neg_mean_squared_log_error'           :func:`metrics.mean_squared_log_error`
+'neg_median_absolute_error'            :func:`metrics.median_absolute_error`
+'r2'                                   :func:`metrics.r2_score`
+'neg_mean_poisson_deviance'            :func:`metrics.mean_poisson_deviance`
+'neg_mean_gamma_deviance'              :func:`metrics.mean_gamma_deviance`
+'neg_mean_absolute_percentage_error'   :func:`metrics.mean_absolute_percentage_error`
+'d2_absolute_error_score'              :func:`metrics.d2_absolute_error_score`
+'d2_pinball_score'                     :func:`metrics.d2_pinball_score`
+'d2_tweedie_score'                     :func:`metrics.d2_tweedie_score`
+====================================   ==============================================     ==================================
 
 
 Usage examples:
@@ -112,14 +118,15 @@ Usage examples:
     >>> model = svm.SVC()
     >>> cross_val_score(model, X, y, cv=5, scoring='wrong_choice')
     Traceback (most recent call last):
-    ValueError: 'wrong_choice' is not a valid scoring value. Use sorted(sklearn.metrics.SCORERS.keys()) to get valid options.
+    ValueError: 'wrong_choice' is not a valid scoring value. Use
+    sklearn.metrics.get_scorer_names() to get valid options.
 
 .. note::
 
-    The values listed by the ValueError exception correspond to the functions measuring
-    prediction accuracy described in the following sections.
-    The scorer objects for those functions are stored in the dictionary
-    ``sklearn.metrics.SCORERS``.
+    The values listed by the ``ValueError`` exception correspond to the
+    functions measuring prediction accuracy described in the following
+    sections. You can retrieve the names of all available scorers by calling
+    :func:`~sklearn.metrics.get_scorer_names`.
 
 .. currentmodule:: sklearn.metrics
 
@@ -137,7 +144,7 @@ measuring a prediction error given groun
 - functions ending with ``_error`` or ``_loss`` return a
   value to minimize, the lower the better.  When converting
   into a scorer object using :func:`make_scorer`, set
-  the ``greater_is_better`` parameter to False (True by default; see the
+  the ``greater_is_better`` parameter to ``False`` (``True`` by default; see the
   parameter description below).
 
 Metrics available for various machine learning tasks are detailed in sections
@@ -196,7 +203,7 @@ Here is an example of building custom sc
     >>> from sklearn.dummy import DummyClassifier
     >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)
     >>> clf = clf.fit(X, y)
-    >>> my_custom_loss_func(clf.predict(X), y)
+    >>> my_custom_loss_func(y, clf.predict(X))
     0.69...
     >>> score(clf, X, y)
     -0.69...
@@ -248,7 +255,7 @@ Using multiple metric evaluation
 Scikit-learn also permits evaluation of multiple metrics in ``GridSearchCV``,
 ``RandomizedSearchCV`` and ``cross_validate``.
 
-There are two ways to specify multiple scoring metrics for the ``scoring``
+There are three ways to specify multiple scoring metrics for the ``scoring``
 parameter:
 
 - As an iterable of string metrics::
@@ -260,25 +267,23 @@ parameter:
       >>> scoring = {'accuracy': make_scorer(accuracy_score),
       ...            'prec': 'precision'}
 
-Note that the dict values can either be scorer functions or one of the
-predefined metric strings.
+  Note that the dict values can either be scorer functions or one of the
+  predefined metric strings.
 
-Currently only those scorer functions that return a single score can be passed
-inside the dict. Scorer functions that return multiple values are not
-permitted and will require a wrapper to return a single metric::
+- As a callable that returns a dictionary of scores::
 
     >>> from sklearn.model_selection import cross_validate
     >>> from sklearn.metrics import confusion_matrix
     >>> # A sample toy binary classification dataset
     >>> X, y = datasets.make_classification(n_classes=2, random_state=0)
     >>> svm = LinearSVC(random_state=0)
-    >>> def tn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 0]
-    >>> def fp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 1]
-    >>> def fn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 0]
-    >>> def tp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 1]
-    >>> scoring = {'tp': make_scorer(tp), 'tn': make_scorer(tn),
-    ...            'fp': make_scorer(fp), 'fn': make_scorer(fn)}
-    >>> cv_results = cross_validate(svm.fit(X, y), X, y, cv=5, scoring=scoring)
+    >>> def confusion_matrix_scorer(clf, X, y):
+    ...      y_pred = clf.predict(X)
+    ...      cm = confusion_matrix(y, y_pred)
+    ...      return {'tn': cm[0, 0], 'fp': cm[0, 1],
+    ...              'fn': cm[1, 0], 'tp': cm[1, 1]}
+    >>> cv_results = cross_validate(svm, X, y, cv=5,
+    ...                             scoring=confusion_matrix_scorer)
     >>> # Getting the test set true positive scores
     >>> print(cv_results['test_tp'])
     [10  9  8  7  8]
@@ -303,16 +308,15 @@ to the overall score, through the ``samp
 Some of these are restricted to the binary classification case:
 
 .. autosummary::
-   :template: function.rst
 
    precision_recall_curve
    roc_curve
+   det_curve
 
 
 Others also work in the multiclass case:
 
 .. autosummary::
-   :template: function.rst
 
    balanced_accuracy_score
    cohen_kappa_score
@@ -320,12 +324,12 @@ Others also work in the multiclass case:
    hinge_loss
    matthews_corrcoef
    roc_auc_score
+   top_k_accuracy_score
 
 
 Some also work in the multilabel case:
 
 .. autosummary::
-   :template: function.rst
 
    accuracy_score
    classification_report
@@ -344,7 +348,6 @@ Some also work in the multilabel case:
 And some work with binary and multilabel (but not multiclass) problems:
 
 .. autosummary::
-   :template: function.rst
 
    average_precision_score
 
@@ -417,7 +420,7 @@ defined as
 
 .. math::
 
-   \texttt{accuracy}(y, \hat{y}) = \frac{1}{n_\text{samples}} \sum_{i=0}^{n_\text{samples}-1} 1(\hat{y}_i = y_i)
+  \texttt{accuracy}(y, \hat{y}) = \frac{1}{n_\text{samples}} \sum_{i=0}^{n_\text{samples}-1} 1(\hat{y}_i = y_i)
 
 where :math:`1(x)` is the `indicator function
 <https://en.wikipedia.org/wiki/Indicator_function>`_.
@@ -431,17 +434,55 @@ where :math:`1(x)` is the `indicator fun
   >>> accuracy_score(y_true, y_pred, normalize=False)
   2
 
-In the multilabel case with binary label indicators: ::
+In the multilabel case with binary label indicators::
 
   >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
   0.5
 
 .. topic:: Example:
 
-  * See :ref:`sphx_glr_auto_examples_feature_selection_plot_permutation_test_for_classification.py`
+  * See :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py`
     for an example of accuracy score usage using permutations of
     the dataset.
 
+.. _top_k_accuracy_score:
+
+Top-k accuracy score
+--------------------
+
+The :func:`top_k_accuracy_score` function is a generalization of
+:func:`accuracy_score`. The difference is that a prediction is considered
+correct as long as the true label is associated with one of the ``k`` highest
+predicted scores. :func:`accuracy_score` is the special case of `k = 1`.
+
+The function covers the binary and multiclass classification cases but not the
+multilabel case.
+
+If :math:`\hat{f}_{i,j}` is the predicted class for the :math:`i`-th sample
+corresponding to the :math:`j`-th largest predicted score and :math:`y_i` is the
+corresponding true value, then the fraction of correct predictions over
+:math:`n_\text{samples}` is defined as
+
+.. math::
+
+   \texttt{top-k accuracy}(y, \hat{f}) = \frac{1}{n_\text{samples}} \sum_{i=0}^{n_\text{samples}-1} \sum_{j=1}^{k} 1(\hat{f}_{i,j} = y_i)
+
+where :math:`k` is the number of guesses allowed and :math:`1(x)` is the
+`indicator function <https://en.wikipedia.org/wiki/Indicator_function>`_.
+
+  >>> import numpy as np
+  >>> from sklearn.metrics import top_k_accuracy_score
+  >>> y_true = np.array([0, 1, 2, 2])
+  >>> y_score = np.array([[0.5, 0.2, 0.2],
+  ...                     [0.3, 0.4, 0.2],
+  ...                     [0.2, 0.4, 0.3],
+  ...                     [0.7, 0.2, 0.1]])
+  >>> top_k_accuracy_score(y_true, y_score, k=2)
+  0.75
+  >>> # Not normalizing gives the number of "correctly" classified samples
+  >>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)
+  3
+
 .. _balanced_accuracy_score:
 
 Balanced accuracy score
@@ -526,8 +567,9 @@ or *informedness*.
      Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples,
      and Case Studies <https://mitpress.mit.edu/books/fundamentals-machine-learning-predictive-data-analytics>`_,
      2015.
-  .. [Urbanowicz2015] Urbanowicz R.J.,  Moore, J.H. `ExSTraCS 2.0: description and evaluation of a scalable learning
-     classifier system <https://doi.org/10.1007/s12065-015-0128-8>`_, Evol. Intel. (2015) 8: 89.
+  .. [Urbanowicz2015] Urbanowicz R.J.,  Moore, J.H. :doi:`ExSTraCS 2.0: description
+      and evaluation of a scalable learning classifier
+      system <10.1007/s12065-015-0128-8>`, Evol. Intel. (2015) 8: 89.
 
 .. _cohen_kappa:
 
@@ -576,7 +618,7 @@ predicted to be in group :math:`j`. Here
          [0, 0, 1],
          [1, 0, 2]])
 
-:func:`plot_confusion_matrix` can be used to visually represent a confusion
+:class:`ConfusionMatrixDisplay` can be used to visually represent a confusion
 matrix as shown in the
 :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`
 example, which creates the following figure:
@@ -686,7 +728,7 @@ where :math:`1(x)` is the `indicator fun
   >>> hamming_loss(y_true, y_pred)
   0.25
 
-In the multilabel case with binary label indicators: ::
+In the multilabel case with binary label indicators::
 
   >>> hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2)))
   0.75
@@ -748,7 +790,6 @@ Several functions allow you to analyze t
 score:
 
 .. autosummary::
-   :template: function.rst
 
    average_precision_score
    f1_score
@@ -760,9 +801,10 @@ score:
 
 Note that the :func:`precision_recall_curve` function is restricted to the
 binary case. The :func:`average_precision_score` function works only in
-binary classification and multilabel indicator format. The
-:func:`plot_precision_recall_curve` function plots the precision recall as
-follows.
+binary classification and multilabel indicator format.
+The :func:`PredictionRecallDisplay.from_estimator` and
+:func:`PredictionRecallDisplay.from_predictions` functions will plot the
+precision-recall curve as follows.
 
 .. image:: ../auto_examples/model_selection/images/sphx_glr_plot_precision_recall_001.png
         :target: ../auto_examples/model_selection/plot_precision_recall.html#plot-the-precision-recall-curve
@@ -794,7 +836,7 @@ follows.
      <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.157.5766&rep=rep1&type=pdf>`_,
      IJCV 2010.
   .. [Davis2006] J. Davis, M. Goadrich, `The Relationship Between Precision-Recall and ROC Curves
-     <http://www.machinelearning.org/proceedings/icml2006/030_The_Relationship_Bet.pdf>`_,
+     <https://www.biostat.wisc.edu/~page/rocpr.pdf>`_,
      ICML 2006.
   .. [Flach2015] P.A. Flach, M. Kull, `Precision-Recall-Gain Curves: PR Analysis Done Right
      <https://papers.nips.cc/paper/5867-precision-recall-gain-curves-pr-analysis-done-right.pdf>`_,
@@ -862,11 +904,11 @@ Here are some small examples in binary c
   >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
   >>> precision, recall, threshold = precision_recall_curve(y_true, y_scores)
   >>> precision
-  array([0.66..., 0.5       , 1.        , 1.        ])
+  array([0.5       , 0.66..., 0.5       , 1.        , 1.        ])
   >>> recall
-  array([1. , 0.5, 0.5, 0. ])
+  array([1. , 1. , 0.5, 0.5, 0. ])
   >>> threshold
-  array([0.35, 0.4 , 0.8 ])
+  array([0.1 , 0.35, 0.4 , 0.8 ])
   >>> average_precision_score(y_true, y_scores)
   0.83...
 
@@ -874,7 +916,7 @@ Here are some small examples in binary c
 
 Multiclass and multilabel classification
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-In multiclass and multilabel classification task, the notions of precision,
+In a multiclass and multilabel classification task, the notions of precision,
 recall, and F-measures can be applied to each label independently.
 There are a few ways to combine results across labels,
 specified by the ``average`` argument to the
@@ -888,8 +930,8 @@ produce an F-score that is not between p
 
 To make this more explicit, consider the following notation:
 
-* :math:`y` the set of *predicted* :math:`(sample, label)` pairs
-* :math:`\hat{y}` the set of *true* :math:`(sample, label)` pairs
+* :math:`y` the set of *true* :math:`(sample, label)` pairs
+* :math:`\hat{y}` the set of *predicted* :math:`(sample, label)` pairs
 * :math:`L` the set of labels
 * :math:`S` the set of samples
 * :math:`y_s` the subset of :math:`y` with sample :math:`s`,
@@ -897,10 +939,10 @@ To make this more explicit, consider the
 * :math:`y_l` the subset of :math:`y` with label :math:`l`
 * similarly, :math:`\hat{y}_s` and :math:`\hat{y}_l` are subsets of
   :math:`\hat{y}`
-* :math:`P(A, B) := \frac{\left| A \cap B \right|}{\left|A\right|}` for some
+* :math:`P(A, B) := \frac{\left| A \cap B \right|}{\left|B\right|}` for some
   sets :math:`A` and :math:`B`
-* :math:`R(A, B) := \frac{\left| A \cap B \right|}{\left|B\right|}`
-  (Conventions vary on handling :math:`B = \emptyset`; this implementation uses
+* :math:`R(A, B) := \frac{\left| A \cap B \right|}{\left|A\right|}`
+  (Conventions vary on handling :math:`A = \emptyset`; this implementation uses
   :math:`R(A, B):=0`, and similar for :math:`P`.)
 * :math:`F_\beta(A, B) := \left(1 + \beta^2\right) \frac{P(A, B) \times R(A, B)}{\beta^2 P(A, B) + R(A, B)}`
 
@@ -915,7 +957,7 @@ Then the metrics are defined as:
 +---------------+------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------+
 |``"macro"``    | :math:`\frac{1}{\left|L\right|} \sum_{l \in L} P(y_l, \hat{y}_l)`                                                | :math:`\frac{1}{\left|L\right|} \sum_{l \in L} R(y_l, \hat{y}_l)`                                                | :math:`\frac{1}{\left|L\right|} \sum_{l \in L} F_\beta(y_l, \hat{y}_l)`                                              |
 +---------------+------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------+
-|``"weighted"`` | :math:`\frac{1}{\sum_{l \in L} \left|\hat{y}_l\right|} \sum_{l \in L} \left|\hat{y}_l\right| P(y_l, \hat{y}_l)`  | :math:`\frac{1}{\sum_{l \in L} \left|\hat{y}_l\right|} \sum_{l \in L} \left|\hat{y}_l\right| R(y_l, \hat{y}_l)`  | :math:`\frac{1}{\sum_{l \in L} \left|\hat{y}_l\right|} \sum_{l \in L} \left|\hat{y}_l\right| F_\beta(y_l, \hat{y}_l)`|
+|``"weighted"`` | :math:`\frac{1}{\sum_{l \in L} \left|y_l\right|} \sum_{l \in L} \left|y_l\right| P(y_l, \hat{y}_l)`              | :math:`\frac{1}{\sum_{l \in L} \left|y_l\right|} \sum_{l \in L} \left|y_l\right| R(y_l, \hat{y}_l)`              | :math:`\frac{1}{\sum_{l \in L} \left|y_l\right|} \sum_{l \in L} \left|y_l\right| F_\beta(y_l, \hat{y}_l)`            |
 +---------------+------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------+
 |``None``       | :math:`\langle P(y_l, \hat{y}_l) | l \in L \rangle`                                                              | :math:`\langle R(y_l, \hat{y}_l) | l \in L \rangle`                                                              | :math:`\langle F_\beta(y_l, \hat{y}_l) | l \in L \rangle`                                                            |
 +---------------+------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------+
@@ -967,7 +1009,7 @@ naively set-wise measure applying native
 apply to multilabel and multiclass through the use of `average` (see
 :ref:`above <average>`).
 
-In the binary case: ::
+In the binary case::
 
   >>> import numpy as np
   >>> from sklearn.metrics import jaccard_score
@@ -978,7 +1020,12 @@ In the binary case: ::
   >>> jaccard_score(y_true[0], y_pred[0])
   0.6666...
 
-In the multilabel case with binary label indicators: ::
+In the 2D comparison case (e.g. image similarity):
+
+  >>> jaccard_score(y_true, y_pred, average="micro")
+  0.6
+
+In the multilabel case with binary label indicators::
 
   >>> jaccard_score(y_true, y_pred, average='samples')
   0.5833...
@@ -988,7 +1035,7 @@ In the multilabel case with binary label
   array([0.5, 0.5, 1. ])
 
 Multiclass problems are binarized and treated like the corresponding
-multilabel problem: ::
+multilabel problem::
 
   >>> y_pred = [0, 2, 1, 2]
   >>> y_true = [0, 1, 2, 2]
@@ -1059,7 +1106,7 @@ with a svm classifier in a multiclass pr
   LinearSVC()
   >>> pred_decision = est.decision_function([[-1], [2], [3]])
   >>> y_true = [0, 2, 3]
-  >>> hinge_loss(y_true, pred_decision, labels)
+  >>> hinge_loss(y_true, pred_decision, labels=labels)
   0.56...
 
 .. _log_loss:
@@ -1089,7 +1136,7 @@ be encoded as a 1-of-K binary indicator
 i.e., :math:`y_{i,k} = 1` if sample :math:`i` has label :math:`k`
 taken from a set of :math:`K` labels.
 Let :math:`P` be a matrix of probability estimates,
-with :math:`p_{i,k} = \operatorname{Pr}(t_{i,k} = 1)`.
+with :math:`p_{i,k} = \operatorname{Pr}(y_{i,k} = 1)`.
 Then the log loss of the whole set is
 
 .. math::
@@ -1331,21 +1378,48 @@ area under the roc curve, the curve info
 For more information see the `Wikipedia article on AUC
 <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve>`_.
 
-  >>> import numpy as np
+Compared to metrics such as the subset accuracy, the Hamming loss, or the
+F1 score, ROC doesn't require optimizing a threshold for each label.
+
+.. _roc_auc_binary:
+
+Binary case
+^^^^^^^^^^^
+
+In the **binary case**, you can either provide the probability estimates, using
+the `classifier.predict_proba()` method, or the non-thresholded decision values
+given by the `classifier.decision_function()` method. In the case of providing
+the probability estimates, the probability of the class with the
+"greater label" should be provided. The "greater label" corresponds to
+`classifier.classes_[1]` and thus `classifier.predict_proba(X)[:, 1]`.
+Therefore, the `y_score` parameter is of size (n_samples,).
+
+  >>> from sklearn.datasets import load_breast_cancer
+  >>> from sklearn.linear_model import LogisticRegression
   >>> from sklearn.metrics import roc_auc_score
-  >>> y_true = np.array([0, 0, 1, 1])
-  >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
-  >>> roc_auc_score(y_true, y_scores)
-  0.75
+  >>> X, y = load_breast_cancer(return_X_y=True)
+  >>> clf = LogisticRegression(solver="liblinear").fit(X, y)
+  >>> clf.classes_
+  array([0, 1])
 
-In multi-label classification, the :func:`roc_auc_score` function is
-extended by averaging over the labels as :ref:`above <average>`.
+We can use the probability estimates corresponding to `clf.classes_[1]`.
 
-Compared to metrics such as the subset accuracy, the Hamming loss, or the
-F1 score, ROC doesn't require optimizing a threshold for each label.
+  >>> y_score = clf.predict_proba(X)[:, 1]
+  >>> roc_auc_score(y, y_score)
+  0.99...
+
+Otherwise, we can use the non-thresholded decision values
+
+  >>> roc_auc_score(y, clf.decision_function(X))
+  0.99...
+
+.. _roc_auc_multiclass:
+
+Multi-class case
+^^^^^^^^^^^^^^^^
 
-The :func:`roc_auc_score` function can also be used in multi-class
-classification. Two averaging strategies are currently supported: the
+The :func:`roc_auc_score` function can also be used in **multi-class
+classification**. Two averaging strategies are currently supported: the
 one-vs-one algorithm computes the average of the pairwise ROC AUC scores, and
 the one-vs-rest algorithm computes the average of the ROC AUC scores for each
 class against all other classes. In both cases, the predicted labels are
@@ -1360,7 +1434,7 @@ uniformly:
 
 .. math::
 
-   \frac{2}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c (\text{AUC}(j | k) +
+   \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c (\text{AUC}(j | k) +
    \text{AUC}(k | j))
 
 where :math:`c` is the number of classes and :math:`\text{AUC}(j | k)` is the
@@ -1375,7 +1449,7 @@ prevalence:
 
 .. math::
 
-   \frac{2}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c p(j \cup k)(
+   \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c p(j \cup k)(
    \text{AUC}(j | k) + \text{AUC}(k | j))
 
 where :math:`c` is the number of classes. This algorithm is used by setting
@@ -1399,6 +1473,34 @@ to the given limit.
    :scale: 75
    :align: center
 
+.. _roc_auc_multilabel:
+
+Multi-label case
+^^^^^^^^^^^^^^^^
+
+In **multi-label classification**, the :func:`roc_auc_score` function is
+extended by averaging over the labels as :ref:`above <average>`. In this case,
+you should provide a `y_score` of shape `(n_samples, n_classes)`. Thus, when
+using the probability estimates, one needs to select the probability of the
+class with the greater label for each output.
+
+  >>> from sklearn.datasets import make_multilabel_classification
+  >>> from sklearn.multioutput import MultiOutputClassifier
+  >>> X, y = make_multilabel_classification(random_state=0)
+  >>> inner_clf = LogisticRegression(solver="liblinear", random_state=0)
+  >>> clf = MultiOutputClassifier(inner_clf).fit(X, y)
+  >>> y_score = np.transpose([y_pred[:, 1] for y_pred in clf.predict_proba(X)])
+  >>> roc_auc_score(y, y_score, average=None)
+  array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])
+
+And the decision values do not require such processing.
+
+  >>> from sklearn.linear_model import RidgeClassifierCV
+  >>> clf = RidgeClassifierCV().fit(X, y)
+  >>> y_score = clf.decision_function(X)
+  >>> roc_auc_score(y, y_score, average=None)
+  array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])
+
 .. topic:: Examples:
 
   * See :ref:`sphx_glr_auto_examples_model_selection_plot_roc.py`
@@ -1438,6 +1540,93 @@ to the given limit.
        In Data Mining, 2001.
        Proceedings IEEE International Conference, pp. 131-138.
 
+.. _det_curve:
+
+Detection error tradeoff (DET)
+------------------------------
+
+The function :func:`det_curve` computes the
+detection error tradeoff curve (DET) curve [WikipediaDET2017]_.
+Quoting Wikipedia:
+
+  "A detection error tradeoff (DET) graph is a graphical plot of error rates
+  for binary classification systems, plotting false reject rate vs. false
+  accept rate. The x- and y-axes are scaled non-linearly by their standard
+  normal deviates (or just by logarithmic transformation), yielding tradeoff
+  curves that are more linear than ROC curves, and use most of the image area
+  to highlight the differences of importance in the critical operating region."
+
+DET curves are a variation of receiver operating characteristic (ROC) curves
+where False Negative Rate is plotted on the y-axis instead of True Positive
+Rate.
+DET curves are commonly plotted in normal deviate scale by transformation with
+:math:`\phi^{-1}` (with :math:`\phi` being the cumulative distribution
+function).
+The resulting performance curves explicitly visualize the tradeoff of error
+types for given classification algorithms.
+See [Martin1997]_ for examples and further motivation.
+
+This figure compares the ROC and DET curves of two example classifiers on the
+same classification task:
+
+.. image:: ../auto_examples/model_selection/images/sphx_glr_plot_det_001.png
+   :target: ../auto_examples/model_selection/plot_det.html
+   :scale: 75
+   :align: center
+
+**Properties:**
+
+* DET curves form a linear curve in normal deviate scale if the detection
+  scores are normally (or close-to normally) distributed.
+  It was shown by [Navratil2007]_ that the reverse it not necessarily true and
+  even more general distributions are able produce linear DET curves.
+
+* The normal deviate scale transformation spreads out the points such that a
+  comparatively larger space of plot is occupied.
+  Therefore curves with similar classification performance might be easier to
+  distinguish on a DET plot.
+
+* With False Negative Rate being "inverse" to True Positive Rate the point
+  of perfection for DET curves is the origin (in contrast to the top left
+  corner for ROC curves).
+
+**Applications and limitations:**
+
+DET curves are intuitive to read and hence allow quick visual assessment of a
+classifier's performance.
+Additionally DET curves can be consulted for threshold analysis and operating
+point selection.
+This is particularly helpful if a comparison of error types is required.
+
+On the other hand DET curves do not provide their metric as a single number.
+Therefore for either automated evaluation or comparison to other
+classification tasks metrics like the derived area under ROC curve might be
+better suited.
+
+.. topic:: Examples:
+
+  * See :ref:`sphx_glr_auto_examples_model_selection_plot_det.py`
+    for an example comparison between receiver operating characteristic (ROC)
+    curves and Detection error tradeoff (DET) curves.
+
+.. topic:: References:
+
+  .. [WikipediaDET2017] Wikipedia contributors. Detection error tradeoff.
+     Wikipedia, The Free Encyclopedia. September 4, 2017, 23:33 UTC.
+     Available at: https://en.wikipedia.org/w/index.php?title=Detection_error_tradeoff&oldid=798982054.
+     Accessed February 19, 2018.
+
+  .. [Martin1997] A. Martin, G. Doddington, T. Kamm, M. Ordowski, and M. Przybocki,
+     `The DET Curve in Assessment of Detection Task Performance
+     <https://ccc.inaoep.mx/~villasen/bib/martin97det.pdf>`_,
+     NIST 1997.
+
+  .. [Navratil2007] J. Navractil and D. Klusacek,
+     "`On Linear DETs,
+     <https://ieeexplore.ieee.org/document/4218079>`_"
+     2007 IEEE International Conference on Acoustics,
+     Speech and Signal Processing - ICASSP '07, Honolulu,
+     HI, 2007, pp. IV-229-IV-232.
 
 .. _zero_one_loss:
 
@@ -1476,7 +1665,7 @@ where :math:`1(x)` is the `indicator fun
   1
 
 In the multilabel case with binary label indicators, where the first label
-set [0,1] has an error: ::
+set [0,1] has an error::
 
   >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
   0.5
@@ -1497,30 +1686,24 @@ Brier score loss
 
 The :func:`brier_score_loss` function computes the
 `Brier score <https://en.wikipedia.org/wiki/Brier_score>`_
-for binary classes. Quoting Wikipedia:
+for binary classes [Brier1950]_. Quoting Wikipedia:
 
     "The Brier score is a proper score function that measures the accuracy of
     probabilistic predictions. It is applicable to tasks in which predictions
     must assign probabilities to a set of mutually exclusive discrete outcomes."
 
-This function returns a score of the mean square difference between the actual
-outcome and the predicted probability of the possible outcome. The actual
-outcome has to be 1 or 0 (true or false), while the predicted probability of
-the actual outcome can be a value between 0 and 1.
-
-The brier score loss is also between 0 to 1 and the lower the score (the mean
-square difference is smaller), the more accurate the prediction is. It can be
-thought of as a measure of the "calibration" of a set of probabilistic
-predictions.
+This function returns the mean squared error of the actual outcome
+:math:`y \in \{0,1\}` and the predicted probability estimate
+:math:`p = \operatorname{Pr}(y = 1)` (:term:`predict_proba`) as outputted by:
 
 .. math::
 
-   BS = \frac{1}{N} \sum_{t=1}^{N}(f_t - o_t)^2
+   BS = \frac{1}{n_{\text{samples}}} \sum_{i=0}^{n_{\text{samples}} - 1}(y_i - p_i)^2
 
-where : :math:`N` is the total number of predictions, :math:`f_t` is the
-predicted probability of the actual outcome :math:`o_t`.
+The Brier score loss is also between 0 to 1 and the lower the value (the mean
+square difference is smaller), the more accurate the prediction is.
 
-Here is a small example of usage of this function:::
+Here is a small example of usage of this function::
 
     >>> import numpy as np
     >>> from sklearn.metrics import brier_score_loss
@@ -1537,6 +1720,18 @@ Here is a small example of usage of this
     >>> brier_score_loss(y_true, y_prob > 0.5)
     0.0
 
+The Brier score can be used to assess how well a classifier is calibrated.
+However, a lower Brier score loss does not always mean a better calibration.
+This is because, by analogy with the bias-variance decomposition of the mean
+squared error, the Brier score loss can be decomposed as the sum of calibration
+loss and refinement loss [Bella2012]_. Calibration loss is defined as the mean
+squared deviation from empirical probabilities derived from the slope of ROC
+segments. Refinement loss can be defined as the expected optimal loss as
+measured by the area under the optimal cost curve. Refinement loss can change
+independently from calibration loss, thus a lower Brier score loss does not
+necessarily mean a better calibrated model. "Only when refinement loss remains
+the same does a lower Brier score loss always mean better calibration"
+[Bella2012]_, [Flach2008]_.
 
 .. topic:: Example:
 
@@ -1546,10 +1741,21 @@ Here is a small example of usage of this
 
 .. topic:: References:
 
-  * G. Brier, `Verification of forecasts expressed in terms of probability
+  .. [Brier1950] G. Brier, `Verification of forecasts expressed in terms of
+    probability
     <ftp://ftp.library.noaa.gov/docs.lib/htdocs/rescue/mwr/078/mwr-078-01-0001.pdf>`_,
     Monthly weather review 78.1 (1950)
 
+  .. [Bella2012] Bella, Ferri, Hernández-Orallo, and Ramírez-Quintana
+    `"Calibration of Machine Learning Models"
+    <http://dmip.webs.upv.es/papers/BFHRHandbook2010.pdf>`_
+    in Khosrow-Pour, M. "Machine learning: concepts, methodologies, tools
+    and applications." Hershey, PA: Information Science Reference (2012).
+
+  .. [Flach2008] Flach, Peter, and Edson Matsubara. `"On classification, ranking,
+    and probability estimation." <https://drops.dagstuhl.de/opus/volltexte/2008/1382/>`_
+    Dagstuhl Seminar Proceedings. Schloss Dagstuhl-Leibniz-Zentrum fr Informatik (2008).
+
 .. _multilabel_ranking_metrics:
 
 Multilabel ranking metrics
@@ -1699,8 +1905,9 @@ Normalized Discounted Cumulative Gain
 -------------------------------------
 
 Discounted Cumulative Gain (DCG) and Normalized Discounted Cumulative Gain
-(NDCG) are ranking metrics; they compare a predicted order to ground-truth
-scores, such as the relevance of answers to a query.
+(NDCG) are ranking metrics implemented in :func:`~sklearn.metrics.dcg_score`
+and :func:`~sklearn.metrics.ndcg_score` ; they compare a predicted order to
+ground-truth scores, such as the relevance of answers to a query.
 
 From the Wikipedia page for Discounted Cumulative Gain:
 
@@ -1764,8 +1971,9 @@ Regression metrics
 The :mod:`sklearn.metrics` module implements several loss, score, and utility
 functions to measure regression performance. Some of those have been enhanced
 to handle the multioutput case: :func:`mean_squared_error`,
-:func:`mean_absolute_error`, :func:`explained_variance_score` and
-:func:`r2_score`.
+:func:`mean_absolute_error`, :func:`r2_score`,
+:func:`explained_variance_score`, :func:`mean_pinball_loss`, :func:`d2_pinball_score`
+and :func:`d2_absolute_error_score`.
 
 
 These functions have an ``multioutput`` keyword argument which specifies the
@@ -1788,71 +1996,87 @@ score puts more importance on well expla
 for backward compatibility. This will be changed to ``uniform_average`` in the
 future.
 
-.. _explained_variance_score:
-
-Explained variance score
--------------------------
-
-The :func:`explained_variance_score` computes the `explained variance
-regression score <https://en.wikipedia.org/wiki/Explained_variation>`_.
-
-If :math:`\hat{y}` is the estimated target output, :math:`y` the corresponding
-(correct) target output, and :math:`Var` is `Variance
-<https://en.wikipedia.org/wiki/Variance>`_, the square of the standard deviation,
-then the explained variance is estimated as follow:
+.. _r2_score:
 
-.. math::
+R² score, the coefficient of determination
+-------------------------------------------
 
-  explained\_{}variance(y, \hat{y}) = 1 - \frac{Var\{ y - \hat{y}\}}{Var\{y\}}
+The :func:`r2_score` function computes the `coefficient of
+determination <https://en.wikipedia.org/wiki/Coefficient_of_determination>`_,
+usually denoted as R².
 
-The best possible score is 1.0, lower values are worse.
+It represents the proportion of variance (of y) that has been explained by the
+independent variables in the model. It provides an indication of goodness of
+fit and therefore a measure of how well unseen samples are likely to be
+predicted by the model, through the proportion of explained variance.
 
-Here is a small example of usage of the :func:`explained_variance_score`
-function::
+As such variance is dataset dependent, R² may not be meaningfully comparable
+across different datasets. Best possible score is 1.0 and it can be negative
+(because the model can be arbitrarily worse). A constant model that always
+predicts the expected (average) value of y, disregarding the input features,
+would get an :math:`R^2` score of 0.0.
 
-    >>> from sklearn.metrics import explained_variance_score
-    >>> y_true = [3, -0.5, 2, 7]
-    >>> y_pred = [2.5, 0.0, 2, 8]
-    >>> explained_variance_score(y_true, y_pred)
-    0.957...
-    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
-    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
-    >>> explained_variance_score(y_true, y_pred, multioutput='raw_values')
-    array([0.967..., 1.        ])
-    >>> explained_variance_score(y_true, y_pred, multioutput=[0.3, 0.7])
-    0.990...
+Note: when the prediction residuals have zero mean, the :math:`R^2` score and
+the :ref:`explained_variance_score` are identical.
 
-.. _max_error:
+If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample
+and :math:`y_i` is the corresponding true value for total :math:`n` samples,
+the estimated R² is defined as:
 
-Max error
--------------------
+.. math::
 
-The :func:`max_error` function computes the maximum `residual error
-<https://en.wikipedia.org/wiki/Errors_and_residuals>`_ , a metric
-that captures the worst case error between the predicted value and
-the true value. In a perfectly fitted single output regression
-model, ``max_error`` would be ``0`` on the training set and though this
-would be highly unlikely in the real world, this metric shows the
-extent of error that the model had when it was fitted.
+  R^2(y, \hat{y}) = 1 - \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{\sum_{i=1}^{n} (y_i - \bar{y})^2}
 
+where :math:`\bar{y} = \frac{1}{n} \sum_{i=1}^{n} y_i` and :math:`\sum_{i=1}^{n} (y_i - \hat{y}_i)^2 = \sum_{i=1}^{n} \epsilon_i^2`.
 
-If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample,
-and :math:`y_i` is the corresponding true value, then the max error is
-defined as
+Note that :func:`r2_score` calculates unadjusted R² without correcting for
+bias in sample variance of y.
 
-.. math::
+In the particular case where the true target is constant, the :math:`R^2` score is
+not finite: it is either ``NaN`` (perfect predictions) or ``-Inf`` (imperfect
+predictions). Such non-finite scores may prevent correct model optimization
+such as grid-search cross-validation to be performed correctly. For this reason
+the default behaviour of :func:`r2_score` is to replace them with 1.0 (perfect
+predictions) or 0.0 (imperfect predictions). If ``force_finite``
+is set to ``False``, this score falls back on the original :math:`R^2` definition.
 
-  \text{Max Error}(y, \hat{y}) = max(| y_i - \hat{y}_i |)
+Here is a small example of usage of the :func:`r2_score` function::
 
-Here is a small example of usage of the :func:`max_error` function::
+  >>> from sklearn.metrics import r2_score
+  >>> y_true = [3, -0.5, 2, 7]
+  >>> y_pred = [2.5, 0.0, 2, 8]
+  >>> r2_score(y_true, y_pred)
+  0.948...
+  >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
+  >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
+  >>> r2_score(y_true, y_pred, multioutput='variance_weighted')
+  0.938...
+  >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
+  >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
+  >>> r2_score(y_true, y_pred, multioutput='uniform_average')
+  0.936...
+  >>> r2_score(y_true, y_pred, multioutput='raw_values')
+  array([0.965..., 0.908...])
+  >>> r2_score(y_true, y_pred, multioutput=[0.3, 0.7])
+  0.925...
+  >>> y_true = [-2, -2, -2]
+  >>> y_pred = [-2, -2, -2]
+  >>> r2_score(y_true, y_pred)
+  1.0
+  >>> r2_score(y_true, y_pred, force_finite=False)
+  nan
+  >>> y_true = [-2, -2, -2]
+  >>> y_pred = [-2, -2, -2 + 1e-8]
+  >>> r2_score(y_true, y_pred)
+  0.0
+  >>> r2_score(y_true, y_pred, force_finite=False)
+  -inf
 
-  >>> from sklearn.metrics import max_error
-  >>> y_true = [3, 2, 7, 1]
-  >>> y_pred = [9, 2, 7, 1]
-  >>> max_error(y_true, y_pred)
-  6
+.. topic:: Example:
 
-The :func:`max_error` does not support multioutput.
+  * See :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py`
+    for an example of R² score usage to
+    evaluate Lasso and Elastic Net on sparse signals.
 
 .. _mean_absolute_error:
 
@@ -1962,6 +2186,42 @@ function::
   >>> mean_squared_log_error(y_true, y_pred)
   0.044...
 
+.. _mean_absolute_percentage_error:
+
+Mean absolute percentage error
+------------------------------
+The :func:`mean_absolute_percentage_error` (MAPE), also known as mean absolute
+percentage deviation (MAPD), is an evaluation metric for regression problems.
+The idea of this metric is to be sensitive to relative errors. It is for example
+not changed by a global scaling of the target variable.
+
+If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample
+and :math:`y_i` is the corresponding true value, then the mean absolute percentage
+error (MAPE) estimated over :math:`n_{\text{samples}}` is defined as
+
+.. math::
+
+  \text{MAPE}(y, \hat{y}) = \frac{1}{n_{\text{samples}}} \sum_{i=0}^{n_{\text{samples}}-1} \frac{{}\left| y_i - \hat{y}_i \right|}{\max(\epsilon, \left| y_i \right|)}
+
+where :math:`\epsilon` is an arbitrary small yet strictly positive number to
+avoid undefined results when y is zero.
+
+The :func:`mean_absolute_percentage_error` function supports multioutput.
+
+Here is a small example of usage of the :func:`mean_absolute_percentage_error`
+function::
+
+  >>> from sklearn.metrics import mean_absolute_percentage_error
+  >>> y_true = [1, 10, 1e6]
+  >>> y_pred = [0.9, 15, 1.2e6]
+  >>> mean_absolute_percentage_error(y_true, y_pred)
+  0.2666...
+
+In above example, if we had used `mean_absolute_error`, it would have ignored
+the small magnitude values and only reflected the error in prediction of highest
+magnitude value. But that problem is resolved in case of MAPE because it calculates
+relative percentage error with respect to actual output.
+
 .. _median_absolute_error:
 
 Median absolute error
@@ -1990,65 +2250,102 @@ function::
   >>> median_absolute_error(y_true, y_pred)
   0.5
 
-.. _r2_score:
 
-R² score, the coefficient of determination
--------------------------------------------
 
-The :func:`r2_score` function computes the `coefficient of
-determination <https://en.wikipedia.org/wiki/Coefficient_of_determination>`_,
-usually denoted as R².
+.. _max_error:
 
-It represents the proportion of variance (of y) that has been explained by the
-independent variables in the model. It provides an indication of goodness of
-fit and therefore a measure of how well unseen samples are likely to be
-predicted by the model, through the proportion of explained variance.
+Max error
+-------------------
 
-As such variance is dataset dependent, R² may not be meaningfully comparable
-across different datasets. Best possible score is 1.0 and it can be negative
-(because the model can be arbitrarily worse). A constant model that always
-predicts the expected value of y, disregarding the input features, would get a
-R² score of 0.0.
+The :func:`max_error` function computes the maximum `residual error
+<https://en.wikipedia.org/wiki/Errors_and_residuals>`_ , a metric
+that captures the worst case error between the predicted value and
+the true value. In a perfectly fitted single output regression
+model, ``max_error`` would be ``0`` on the training set and though this
+would be highly unlikely in the real world, this metric shows the
+extent of error that the model had when it was fitted.
 
-If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample
-and :math:`y_i` is the corresponding true value for total :math:`n` samples,
-the estimated R² is defined as:
+
+If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample,
+and :math:`y_i` is the corresponding true value, then the max error is
+defined as
 
 .. math::
 
-  R^2(y, \hat{y}) = 1 - \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{\sum_{i=1}^{n} (y_i - \bar{y})^2}
+  \text{Max Error}(y, \hat{y}) = \max(| y_i - \hat{y}_i |)
 
-where :math:`\bar{y} = \frac{1}{n} \sum_{i=1}^{n} y_i` and :math:`\sum_{i=1}^{n} (y_i - \hat{y}_i)^2 = \sum_{i=1}^{n} \epsilon_i^2`.
+Here is a small example of usage of the :func:`max_error` function::
 
-Note that :func:`r2_score` calculates unadjusted R² without correcting for
-bias in sample variance of y.
+  >>> from sklearn.metrics import max_error
+  >>> y_true = [3, 2, 7, 1]
+  >>> y_pred = [9, 2, 7, 1]
+  >>> max_error(y_true, y_pred)
+  6
 
-Here is a small example of usage of the :func:`r2_score` function::
+The :func:`max_error` does not support multioutput.
 
-  >>> from sklearn.metrics import r2_score
-  >>> y_true = [3, -0.5, 2, 7]
-  >>> y_pred = [2.5, 0.0, 2, 8]
-  >>> r2_score(y_true, y_pred)
-  0.948...
-  >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
-  >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
-  >>> r2_score(y_true, y_pred, multioutput='variance_weighted')
-  0.938...
-  >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
-  >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
-  >>> r2_score(y_true, y_pred, multioutput='uniform_average')
-  0.936...
-  >>> r2_score(y_true, y_pred, multioutput='raw_values')
-  array([0.965..., 0.908...])
-  >>> r2_score(y_true, y_pred, multioutput=[0.3, 0.7])
-  0.925...
+.. _explained_variance_score:
 
+Explained variance score
+-------------------------
 
-.. topic:: Example:
+The :func:`explained_variance_score` computes the `explained variance
+regression score <https://en.wikipedia.org/wiki/Explained_variation>`_.
 
-  * See :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py`
-    for an example of R² score usage to
-    evaluate Lasso and Elastic Net on sparse signals.
+If :math:`\hat{y}` is the estimated target output, :math:`y` the corresponding
+(correct) target output, and :math:`Var` is `Variance
+<https://en.wikipedia.org/wiki/Variance>`_, the square of the standard deviation,
+then the explained variance is estimated as follow:
+
+.. math::
+
+  explained\_{}variance(y, \hat{y}) = 1 - \frac{Var\{ y - \hat{y}\}}{Var\{y\}}
+
+The best possible score is 1.0, lower values are worse.
+
+.. topic:: Link to :ref:`r2_score`
+
+    The difference between the explained variance score and the :ref:`r2_score`
+    is that when the explained variance score does not account for
+    systematic offset in the prediction. For this reason, the
+    :ref:`r2_score` should be preferred in general.
+
+In the particular case where the true target is constant, the Explained
+Variance score is not finite: it is either ``NaN`` (perfect predictions) or
+``-Inf`` (imperfect predictions). Such non-finite scores may prevent correct
+model optimization such as grid-search cross-validation to be performed
+correctly. For this reason the default behaviour of
+:func:`explained_variance_score` is to replace them with 1.0 (perfect
+predictions) or 0.0 (imperfect predictions). You can set the ``force_finite``
+parameter to ``False`` to prevent this fix from happening and fallback on the
+original Explained Variance score.
+
+Here is a small example of usage of the :func:`explained_variance_score`
+function::
+
+    >>> from sklearn.metrics import explained_variance_score
+    >>> y_true = [3, -0.5, 2, 7]
+    >>> y_pred = [2.5, 0.0, 2, 8]
+    >>> explained_variance_score(y_true, y_pred)
+    0.957...
+    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
+    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
+    >>> explained_variance_score(y_true, y_pred, multioutput='raw_values')
+    array([0.967..., 1.        ])
+    >>> explained_variance_score(y_true, y_pred, multioutput=[0.3, 0.7])
+    0.990...
+    >>> y_true = [-2, -2, -2]
+    >>> y_pred = [-2, -2, -2]
+    >>> explained_variance_score(y_true, y_pred)
+    1.0
+    >>> explained_variance_score(y_true, y_pred, force_finite=False)
+    nan
+    >>> y_true = [-2, -2, -2]
+    >>> y_pred = [-2, -2, -2 + 1e-8]
+    >>> explained_variance_score(y_true, y_pred)
+    0.0
+    >>> explained_variance_score(y_true, y_pred, force_finite=False)
+    -inf
 
 
 .. _mean_tweedie_deviance:
@@ -2078,10 +2375,10 @@ is defined as
   \sum_{i=0}^{n_\text{samples} - 1}
   \begin{cases}
   (y_i-\hat{y}_i)^2, & \text{for }p=0\text{ (Normal)}\\
-  2(y_i \log(y/\hat{y}_i) + \hat{y}_i - y_i),  & \text{for}p=1\text{ (Poisson)}\\
-  2(\log(\hat{y}_i/y_i) + y_i/\hat{y}_i - 1),  & \text{for}p=2\text{ (Gamma)}\\
+  2(y_i \log(y_i/\hat{y}_i) + \hat{y}_i - y_i),  & \text{for }p=1\text{ (Poisson)}\\
+  2(\log(\hat{y}_i/y_i) + y_i/\hat{y}_i - 1),  & \text{for }p=2\text{ (Gamma)}\\
   2\left(\frac{\max(y_i,0)^{2-p}}{(1-p)(2-p)}-
-  \frac{y\,\hat{y}^{1-p}_i}{1-p}+\frac{\hat{y}^{2-p}_i}{2-p}\right),
+  \frac{y_i\,\hat{y}_i^{1-p}}{1-p}+\frac{\hat{y}_i^{2-p}}{2-p}\right),
   & \text{otherwise}
   \end{cases}
 
@@ -2093,8 +2390,8 @@ distribution (``power=0``), quadraticall
 ``power`` the less weight is given to extreme deviations between true
 and predicted targets.
 
-For instance, let's compare the two predictions 1.0 and 100 that are both
-50% of their corresponding true value.
+For instance, let's compare the two predictions 1.5 and 150 that are both
+50% larger than their corresponding true value.
 
 The mean squared error (``power=0``) is very sensitive to the
 prediction difference of the second point,::
@@ -2122,6 +2419,158 @@ the difference in errors decreases. Fina
 we would get identical errors. The deviance when ``power=2`` is thus only
 sensitive to relative errors.
 
+.. _pinball_loss:
+
+Pinball loss
+------------
+
+The :func:`mean_pinball_loss` function is used to evaluate the predictive
+performance of quantile regression models. The `pinball loss
+<https://en.wikipedia.org/wiki/Quantile_regression#Computation>`_ is equivalent
+to :func:`mean_absolute_error` when the quantile parameter ``alpha`` is set to
+0.5.
+
+.. math::
+
+  \text{pinball}(y, \hat{y}) = \frac{1}{n_{\text{samples}}} \sum_{i=0}^{n_{\text{samples}}-1}  \alpha \max(y_i - \hat{y}_i, 0) + (1 - \alpha) \max(\hat{y}_i - y_i, 0)
+
+Here is a small example of usage of the :func:`mean_pinball_loss` function::
+
+  >>> from sklearn.metrics import mean_pinball_loss
+  >>> y_true = [1, 2, 3]
+  >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)
+  0.03...
+  >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)
+  0.3...
+  >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)
+  0.3...
+  >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)
+  0.03...
+  >>> mean_pinball_loss(y_true, y_true, alpha=0.1)
+  0.0
+  >>> mean_pinball_loss(y_true, y_true, alpha=0.9)
+  0.0
+
+It is possible to build a scorer object with a specific choice of ``alpha``::
+
+  >>> from sklearn.metrics import make_scorer
+  >>> mean_pinball_loss_95p = make_scorer(mean_pinball_loss, alpha=0.95)
+
+Such a scorer can be used to evaluate the generalization performance of a
+quantile regressor via cross-validation:
+
+  >>> from sklearn.datasets import make_regression
+  >>> from sklearn.model_selection import cross_val_score
+  >>> from sklearn.ensemble import GradientBoostingRegressor
+  >>>
+  >>> X, y = make_regression(n_samples=100, random_state=0)
+  >>> estimator = GradientBoostingRegressor(
+  ...     loss="quantile",
+  ...     alpha=0.95,
+  ...     random_state=0,
+  ... )
+  >>> cross_val_score(estimator, X, y, cv=5, scoring=mean_pinball_loss_95p)
+  array([13.6..., 9.7..., 23.3..., 9.5..., 10.4...])
+
+It is also possible to build scorer objects for hyper-parameter tuning. The
+sign of the loss must be switched to ensure that greater means better as
+explained in the example linked below.
+
+.. topic:: Example:
+
+  * See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`
+    for an example of using a the pinball loss to evaluate and tune the
+    hyper-parameters of quantile regression models on data with non-symmetric
+    noise and outliers.
+
+.. _d2_score:
+
+D² score
+--------
+
+The D² score computes the fraction of deviance explained.
+It is a generalization of R², where the squared error is generalized and replaced
+by a deviance of choice :math:`\text{dev}(y, \hat{y})`
+(e.g., Tweedie, pinball or mean absolute error). D² is a form of a *skill score*.
+It is calculated as
+
+.. math::
+
+  D^2(y, \hat{y}) = 1 - \frac{\text{dev}(y, \hat{y})}{\text{dev}(y, y_{\text{null}})} \,.
+
+Where :math:`y_{\text{null}}` is the optimal prediction of an intercept-only model
+(e.g., the mean of `y_true` for the Tweedie case, the median for absolute
+error and the alpha-quantile for pinball loss).
+
+Like R², the best possible score is 1.0 and it can be negative (because the
+model can be arbitrarily worse). A constant model that always predicts
+:math:`y_{\text{null}}`, disregarding the input features, would get a D² score
+of 0.0.
+
+D² Tweedie score
+^^^^^^^^^^^^^^^^
+
+The :func:`d2_tweedie_score` function implements the special case of D²
+where :math:`\text{dev}(y, \hat{y})` is the Tweedie deviance, see :ref:`mean_tweedie_deviance`.
+It is also known as D² Tweedie and is related to McFadden's likelihood ratio index.
+
+The argument ``power`` defines the Tweedie power as for
+:func:`mean_tweedie_deviance`. Note that for `power=0`,
+:func:`d2_tweedie_score` equals :func:`r2_score` (for single targets).
+
+A scorer object with a specific choice of ``power`` can be built by::
+
+  >>> from sklearn.metrics import d2_tweedie_score, make_scorer
+  >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, power=1.5)
+
+D² pinball score
+^^^^^^^^^^^^^^^^^^^^^
+
+The :func:`d2_pinball_score` function implements the special case
+of D² with the pinball loss, see :ref:`pinball_loss`, i.e.:
+
+.. math::
+
+  \text{dev}(y, \hat{y}) = \text{pinball}(y, \hat{y}).
+
+The argument ``alpha`` defines the slope of the pinball loss as for
+:func:`mean_pinball_loss` (:ref:`pinball_loss`). It determines the
+quantile level ``alpha`` for which the pinball loss and also D²
+are optimal. Note that for `alpha=0.5` (the default) :func:`d2_pinball_score`
+equals :func:`d2_absolute_error_score`.
+
+A scorer object with a specific choice of ``alpha`` can be built by::
+
+  >>> from sklearn.metrics import d2_pinball_score, make_scorer
+  >>> d2_pinball_score_08 = make_scorer(d2_pinball_score, alpha=0.8)
+
+D² absolute error score
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The :func:`d2_absolute_error_score` function implements the special case of
+the :ref:`mean_absolute_error`:
+
+.. math::
+
+  \text{dev}(y, \hat{y}) = \text{MAE}(y, \hat{y}).
+
+Here are some usage examples of the :func:`d2_absolute_error_score` function::
+
+  >>> from sklearn.metrics import d2_absolute_error_score
+  >>> y_true = [3, -0.5, 2, 7]
+  >>> y_pred = [2.5, 0.0, 2, 8]
+  >>> d2_absolute_error_score(y_true, y_pred)
+  0.764...
+  >>> y_true = [1, 2, 3]
+  >>> y_pred = [1, 2, 3]
+  >>> d2_absolute_error_score(y_true, y_pred)
+  1.0
+  >>> y_true = [1, 2, 3]
+  >>> y_pred = [2, 2, 2]
+  >>> d2_absolute_error_score(y_true, y_pred)
+  0.0
+
+
 .. _clustering_metrics:
 
 Clustering metrics
diff -pruN 0.23.2-5/doc/modules/model_persistence.rst 1.1.1-1/doc/modules/model_persistence.rst
--- 0.23.2-5/doc/modules/model_persistence.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/model_persistence.rst	1970-01-01 00:00:00.000000000 +0000
@@ -1,92 +0,0 @@
-.. _model_persistence:
-
-=================
-Model persistence
-=================
-
-After training a scikit-learn model, it is desirable to have a way to persist
-the model for future use without having to retrain. The following section gives
-you an example of how to persist a model with pickle. We'll also review a few
-security and maintainability issues when working with pickle serialization.
-
-An alternative to pickling is to export the model to another format using one
-of the model export tools listed under :ref:`related_projects`. Unlike
-pickling, once exported you cannot recover the full Scikit-learn estimator
-object, but you can deploy the model for prediction, usually by using tools
-supporting open model interchange formats such as `ONNX <https://onnx.ai/>`_ or
-`PMML <http://dmg.org/pmml/v4-4/GeneralStructure.html>`_.
-
-Persistence example
--------------------
-
-It is possible to save a model in scikit-learn by using Python's built-in
-persistence model, namely `pickle <https://docs.python.org/3/library/pickle.html>`_::
-
-  >>> from sklearn import svm
-  >>> from sklearn import datasets
-  >>> clf = svm.SVC()
-  >>> X, y= datasets.load_iris(return_X_y=True)
-  >>> clf.fit(X, y)
-  SVC()
-
-  >>> import pickle
-  >>> s = pickle.dumps(clf)
-  >>> clf2 = pickle.loads(s)
-  >>> clf2.predict(X[0:1])
-  array([0])
-  >>> y[0]
-  0
-
-In the specific case of scikit-learn, it may be better to use joblib's
-replacement of pickle (``dump`` & ``load``), which is more efficient on
-objects that carry large numpy arrays internally as is often the case for
-fitted scikit-learn estimators, but can only pickle to the disk and not to a
-string::
-
-  >>> from joblib import dump, load
-  >>> dump(clf, 'filename.joblib') # doctest: +SKIP
-
-Later you can load back the pickled model (possibly in another Python process)
-with::
-
-  >>> clf = load('filename.joblib') # doctest:+SKIP
-
-.. note::
-
-   ``dump`` and ``load`` functions also accept file-like object
-   instead of filenames. More information on data persistence with Joblib is
-   available `here <https://joblib.readthedocs.io/en/latest/persistence.html>`_.
-
-.. _persistence_limitations:
-
-Security & maintainability limitations
---------------------------------------
-
-pickle (and joblib by extension), has some issues regarding maintainability
-and security. Because of this,
-
-* Never unpickle untrusted data as it could lead to malicious code being 
-  executed upon loading.
-* While models saved using one version of scikit-learn might load in 
-  other versions, this is entirely unsupported and inadvisable. It should 
-  also be kept in mind that operations performed on such data could give
-  different and unexpected results.
-
-In order to rebuild a similar model with future versions of scikit-learn,
-additional metadata should be saved along the pickled model:
-
-* The training data, e.g. a reference to an immutable snapshot
-* The python source code used to generate the model
-* The versions of scikit-learn and its dependencies
-* The cross validation score obtained on the training data
-
-This should make it possible to check that the cross-validation score is in the
-same range as before.
-
-Since a model internal representation may be different on two different
-architectures, dumping a model on one architecture and loading it on
-another architecture is not supported.
-
-If you want to know more about these issues and explore other possible
-serialization methods, please refer to this
-`talk by Alex Gaynor <https://pyvideo.org/video/2566/pickles-are-for-delis-not-software>`_.
diff -pruN 0.23.2-5/doc/modules/multiclass.rst 1.1.1-1/doc/modules/multiclass.rst
--- 0.23.2-5/doc/modules/multiclass.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/multiclass.rst	2022-05-19 12:16:26.452782900 +0000
@@ -1,293 +1,200 @@
 
 .. _multiclass:
 
-====================================
-Multiclass and multilabel algorithms
-====================================
+=====================================
+Multiclass and multioutput algorithms
+=====================================
+
+This section of the user guide covers functionality related to multi-learning
+problems, including :term:`multiclass`, :term:`multilabel`, and
+:term:`multioutput` classification and regression.
+
+The modules in this section implement :term:`meta-estimators`, which require a
+base estimator to be provided in their constructor. Meta-estimators extend the
+functionality of the base estimator to support multi-learning problems, which
+is accomplished by transforming the multi-learning problem into a set of
+simpler problems, then fitting one estimator per problem.
+
+This section covers two modules: :mod:`sklearn.multiclass` and
+:mod:`sklearn.multioutput`. The chart below demonstrates the problem types
+that each module is responsible for, and the corresponding meta-estimators
+that each module provides.
+
+.. image:: ../images/multi_org_chart.png
+   :align: center
+
+The table below provides a quick reference on the differences between problem
+types. More detailed explanations can be found in subsequent sections of this
+guide.
+
++------------------------------+-----------------------+-------------------------+--------------------------------------------------+
+|                              | Number of targets     | Target cardinality      | Valid                                            |
+|                              |                       |                         | :func:`~sklearn.utils.multiclass.type_of_target` |
++==============================+=======================+=========================+==================================================+
+| Multiclass                   |  1                    | >2                      | 'multiclass'                                     |
+| classification               |                       |                         |                                                  |
++------------------------------+-----------------------+-------------------------+--------------------------------------------------+
+| Multilabel                   | >1                    |  2 (0 or 1)             | 'multilabel-indicator'                           |
+| classification               |                       |                         |                                                  |
++------------------------------+-----------------------+-------------------------+--------------------------------------------------+
+| Multiclass-multioutput       | >1                    | >2                      | 'multiclass-multioutput'                         |
+| classification               |                       |                         |                                                  |
++------------------------------+-----------------------+-------------------------+--------------------------------------------------+
+| Multioutput                  | >1                    | Continuous              | 'continuous-multioutput'                         |
+| regression                   |                       |                         |                                                  |
++------------------------------+-----------------------+-------------------------+--------------------------------------------------+
+
+Below is a summary of scikit-learn estimators that have multi-learning support
+built-in, grouped by strategy. You don't need the meta-estimators provided by
+this section if you're using one of these estimators. However, meta-estimators
+can provide additional strategies beyond what is built-in:
 
-.. currentmodule:: sklearn.multiclass
-
-.. warning::
-    All classifiers in scikit-learn do multiclass classification
-    out-of-the-box. You don't need to use the :mod:`sklearn.multiclass` module
-    unless you want to experiment with different multiclass strategies.
-
-The :mod:`sklearn.multiclass` module implements *meta-estimators* to solve
-``multiclass`` and ``multilabel`` classification problems
-by decomposing such problems into binary classification problems. ``multioutput``
-regression is also supported.
-
-- **Multiclass classification**: classification task with more than two classes.
-  Each sample can only be labelled as one class.
-
-  For example, classification using features extracted from a set of images of
-  fruit, where each image may either be of an orange, an apple, or a pear.
-  Each image is one sample and is labelled as one of the 3 possible classes.
-  Multiclass classification makes the assumption that each sample is assigned
-  to one and only one label - one sample cannot, for example, be both a pear
-  and an apple.
-
-  Valid :term:`multiclass` representations for
-  :func:`~utils.multiclass.type_of_target` (`y`) are:
-
-    - 1d or column vector containing more than two discrete values. An
-      example of a vector ``y`` for 3 samples:
-
-        >>> import numpy as np
-        >>> y = np.array(['apple', 'pear', 'apple'])
-        >>> print(y)
-        ['apple' 'pear' 'apple']
-
-    - sparse :term:`binary` matrix of shape ``(n_samples, n_classes)`` with a
-      single element per row, where each column represents one class. An
-      example of a sparse :term:`binary` matrix ``y`` for 3 samples, where
-      the columns, in order, are orange, apple and pear:
-
-        >>> from scipy import sparse
-        >>> row_ind = np.array([0, 1, 2])
-        >>> col_ind = np.array([1, 2, 1])
-        >>> y_sparse = sparse.csr_matrix((np.ones(3), (row_ind, col_ind)))
-        >>> print(y_sparse)
-          (0, 1)	1.0
-          (1, 2)	1.0
-          (2, 1)	1.0
-
-
-- **Multilabel classification**: classification task labelling each sample with
-  ``x`` labels from ``n_classes`` possible classes, where ``x`` can be 0 to
-  ``n_classes`` inclusive. This can be thought of as predicting properties of a
-  sample that are not mutually exclusive. Formally, a binary output is assigned
-  to each class, for every sample. Positive classes are indicated with 1 and
-  negative classes with 0 or -1. It is thus comparable to running ``n_classes``
-  binary classification tasks, for example with
-  :class:`sklearn.multioutput.MultiOutputClassifier`. This approach treats
-  each label independently whereas multilabel classifiers *may* treat the
-  multiple classes simultaneously, accounting for correlated behavior among
-  them.
-
-  For example, prediction of the topics relevant to a text document or video.
-  The document or video may be about one of 'religion', 'politics', 'finance'
-  or 'education', several of the topic classes or all of the topic classes.
-
-  A valid representation of :term:`multilabel` `y` is an either dense or sparse
-  :term:`binary` matrix of shape ``(n_samples, n_classes)``. Each column
-  represents a class. The ``1``'s in each row denote the positive classes a
-  sample has been labelled with. An example of a dense matrix ``y`` for 3
-  samples:
-
-    >>> y = np.array([[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]])
-    >>> print(y)
-    [[1 0 0 1]
-     [0 0 1 1]
-     [0 0 0 0]]
-
-  An example of the same ``y`` in sparse matrix form:
-
-    >>> y_sparse = sparse.csr_matrix(y)
-    >>> print(y_sparse)
-      (0, 0)	1
-      (0, 3)	1
-      (1, 2)	1
-      (1, 3)	1
-
-
-- **Multioutput regression**: predicts multiple numerical properties for each
-  sample. Each property is a numerical variable and the number of properties
-  to be predicted for each sample is greater than or equal to 2. Some estimators
-  that support multioutput regression are faster than just running ``n_output``
-  estimators.
-
-  For example, prediction of both wind speed and wind direction, in degrees,
-  using data obtained at a certain location. Each sample would be data
-  obtained at one location and both wind speed and direction would be
-  output for each sample.
-
-  A valid representation of :term:`multioutput` `y` is a dense matrix of shape
-  ``(n_samples, n_classes)`` of floats. A column wise concatenation of
-  :term:`continuous` variables. An example of ``y`` for 3 samples:
-
-    >>> y = np.array([[31.4, 94], [40.5, 109], [25.0, 30]])
-    >>> print(y)
-    [[ 31.4  94. ]
-     [ 40.5 109. ]
-     [ 25.   30. ]]
-
-
-- **Multioutput-multiclass classification**
-  (also known as **multitask classification**):
-  classification task which labels each sample with a set of **non-binary**
-  properties. Both the number of properties and the number of
-  classes per property is greater than 2. A single estimator thus
-  handles several joint classification tasks. This is both a generalization of
-  the multi\ *label* classification task, which only considers binary
-  attributes, as well as a generalization of the multi\ *class* classification
-  task, where only one property is considered.
-
-  For example, classification of the properties "type of fruit" and "colour"
-  for a set of images of fruit. The property "type of fruit" has the possible
-  classes: "apple", "pear" and "orange". The property "colour" has the
-  possible classes: "green", "red", "yellow" and "orange". Each sample is an
-  image of a fruit, a label is output for both properties and each label is
-  one of the possible classes of the corresponding property.
-
-  A valid representation of :term:`multioutput` `y` is a dense matrix of shape
-  ``(n_samples, n_classes)`` of class labels. A column wise concatenation of 1d
-  :term:`multiclass` variables. An example of ``y`` for 3 samples:
-
-    >>> y = np.array([['apple', 'green'], ['orange', 'orange'], ['pear', 'green']])
-    >>> print(y)
-    [['apple' 'green']
-     ['orange' 'orange']
-     ['pear' 'green']]
-
-  Note that all classifiers handling multioutput-multiclass (also known as
-  multitask classification) tasks, support the multilabel classification task
-  as a special case. Multitask classification is similar to the multioutput
-  classification task with different model formulations. For more information,
-  see the relevant estimator documentation.
-
-
-All scikit-learn classifiers are capable of multiclass classification,
-but the meta-estimators offered by :mod:`sklearn.multiclass`
-permit changing the way they handle more than two classes
-because this may have an effect on classifier performance
-(either in terms of generalization error or required computational resources).
-
-**Summary**
-
-+-----------------+-------------+-------------+------------------------------------------+
-|                 | Number of   | Target      | Valid                                    |
-|                 | targets     | cardinality | :func:`~utils.multiclass.type_of_target` |
-+=================+=============+=============+==========================================+
-| Multiclass      |  1          | >2          | - 'multiclass'                           |
-| classification  |             |             |                                          |
-+-----------------+-------------+-------------+------------------------------------------+
-| Multilabel      | >1          |  2 (0 or 1) | - 'multilabel-indicator'                 |
-| classification  |             |             |                                          |
-+-----------------+-------------+-------------+------------------------------------------+
-| Multioutput     | >1          | Continuous  | - 'continuous-multioutput'               |
-| regression      |             |             |                                          |
-+-----------------+-------------+-------------+------------------------------------------+
-| Multioutput-    | >1          | >2          | - 'multiclass-multioutput'               |
-| multiclass      |             |             |                                          |
-| classification  |             |             |                                          |
-+-----------------+-------------+-------------+------------------------------------------+
-
-Below is a summary of the classifiers supported by scikit-learn
-grouped by strategy; you don't need the meta-estimators in this class
-if you're using one of these, unless you want custom multiclass behavior:
+.. currentmodule:: sklearn
 
 - **Inherently multiclass:**
 
-  - :class:`sklearn.naive_bayes.BernoulliNB`
-  - :class:`sklearn.tree.DecisionTreeClassifier`
-  - :class:`sklearn.tree.ExtraTreeClassifier`
-  - :class:`sklearn.ensemble.ExtraTreesClassifier`
-  - :class:`sklearn.naive_bayes.GaussianNB`
-  - :class:`sklearn.neighbors.KNeighborsClassifier`
-  - :class:`sklearn.semi_supervised.LabelPropagation`
-  - :class:`sklearn.semi_supervised.LabelSpreading`
-  - :class:`sklearn.discriminant_analysis.LinearDiscriminantAnalysis`
-  - :class:`sklearn.svm.LinearSVC` (setting multi_class="crammer_singer")
-  - :class:`sklearn.linear_model.LogisticRegression` (setting multi_class="multinomial")
-  - :class:`sklearn.linear_model.LogisticRegressionCV` (setting multi_class="multinomial")
-  - :class:`sklearn.neural_network.MLPClassifier`
-  - :class:`sklearn.neighbors.NearestCentroid`
-  - :class:`sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis`
-  - :class:`sklearn.neighbors.RadiusNeighborsClassifier`
-  - :class:`sklearn.ensemble.RandomForestClassifier`
-  - :class:`sklearn.linear_model.RidgeClassifier`
-  - :class:`sklearn.linear_model.RidgeClassifierCV`
+  - :class:`naive_bayes.BernoulliNB`
+  - :class:`tree.DecisionTreeClassifier`
+  - :class:`tree.ExtraTreeClassifier`
+  - :class:`ensemble.ExtraTreesClassifier`
+  - :class:`naive_bayes.GaussianNB`
+  - :class:`neighbors.KNeighborsClassifier`
+  - :class:`semi_supervised.LabelPropagation`
+  - :class:`semi_supervised.LabelSpreading`
+  - :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  - :class:`svm.LinearSVC` (setting multi_class="crammer_singer")
+  - :class:`linear_model.LogisticRegression` (setting multi_class="multinomial")
+  - :class:`linear_model.LogisticRegressionCV` (setting multi_class="multinomial")
+  - :class:`neural_network.MLPClassifier`
+  - :class:`neighbors.NearestCentroid`
+  - :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`
+  - :class:`neighbors.RadiusNeighborsClassifier`
+  - :class:`ensemble.RandomForestClassifier`
+  - :class:`linear_model.RidgeClassifier`
+  - :class:`linear_model.RidgeClassifierCV`
 
 
 - **Multiclass as One-Vs-One:**
 
-  - :class:`sklearn.svm.NuSVC`
-  - :class:`sklearn.svm.SVC`.
-  - :class:`sklearn.gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_one")
+  - :class:`svm.NuSVC`
+  - :class:`svm.SVC`.
+  - :class:`gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_one")
 
 
 - **Multiclass as One-Vs-The-Rest:**
 
-  - :class:`sklearn.ensemble.GradientBoostingClassifier`
-  - :class:`sklearn.gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_rest")
-  - :class:`sklearn.svm.LinearSVC` (setting multi_class="ovr")
-  - :class:`sklearn.linear_model.LogisticRegression` (setting multi_class="ovr")
-  - :class:`sklearn.linear_model.LogisticRegressionCV` (setting multi_class="ovr")
-  - :class:`sklearn.linear_model.SGDClassifier`
-  - :class:`sklearn.linear_model.Perceptron`
-  - :class:`sklearn.linear_model.PassiveAggressiveClassifier`
+  - :class:`ensemble.GradientBoostingClassifier`
+  - :class:`gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_rest")
+  - :class:`svm.LinearSVC` (setting multi_class="ovr")
+  - :class:`linear_model.LogisticRegression` (setting multi_class="ovr")
+  - :class:`linear_model.LogisticRegressionCV` (setting multi_class="ovr")
+  - :class:`linear_model.SGDClassifier`
+  - :class:`linear_model.Perceptron`
+  - :class:`linear_model.PassiveAggressiveClassifier`
 
 
 - **Support multilabel:**
 
-  - :class:`sklearn.tree.DecisionTreeClassifier`
-  - :class:`sklearn.tree.ExtraTreeClassifier`
-  - :class:`sklearn.ensemble.ExtraTreesClassifier`
-  - :class:`sklearn.neighbors.KNeighborsClassifier`
-  - :class:`sklearn.neural_network.MLPClassifier`
-  - :class:`sklearn.neighbors.RadiusNeighborsClassifier`
-  - :class:`sklearn.ensemble.RandomForestClassifier`
-  - :class:`sklearn.linear_model.RidgeClassifierCV`
+  - :class:`tree.DecisionTreeClassifier`
+  - :class:`tree.ExtraTreeClassifier`
+  - :class:`ensemble.ExtraTreesClassifier`
+  - :class:`neighbors.KNeighborsClassifier`
+  - :class:`neural_network.MLPClassifier`
+  - :class:`neighbors.RadiusNeighborsClassifier`
+  - :class:`ensemble.RandomForestClassifier`
+  - :class:`linear_model.RidgeClassifier`
+  - :class:`linear_model.RidgeClassifierCV`
 
 
 - **Support multiclass-multioutput:**
 
-  - :class:`sklearn.tree.DecisionTreeClassifier`
-  - :class:`sklearn.tree.ExtraTreeClassifier`
-  - :class:`sklearn.ensemble.ExtraTreesClassifier`
-  - :class:`sklearn.neighbors.KNeighborsClassifier`
-  - :class:`sklearn.neighbors.RadiusNeighborsClassifier`
-  - :class:`sklearn.ensemble.RandomForestClassifier`
+  - :class:`tree.DecisionTreeClassifier`
+  - :class:`tree.ExtraTreeClassifier`
+  - :class:`ensemble.ExtraTreesClassifier`
+  - :class:`neighbors.KNeighborsClassifier`
+  - :class:`neighbors.RadiusNeighborsClassifier`
+  - :class:`ensemble.RandomForestClassifier`
 
+.. _multiclass_classification:
+
+Multiclass classification
+=========================
 
 .. warning::
+    All classifiers in scikit-learn do multiclass classification
+    out-of-the-box. You don't need to use the :mod:`sklearn.multiclass` module
+    unless you want to experiment with different multiclass strategies.
 
-    At present, no metric in :mod:`sklearn.metrics`
-    supports the multioutput-multiclass classification task.
+**Multiclass classification** is a classification task with more than two
+classes. Each sample can only be labeled as one class.
+
+For example, classification using features extracted from a set of images of
+fruit, where each image may either be of an orange, an apple, or a pear.
+Each image is one sample and is labeled as one of the 3 possible classes.
+Multiclass classification makes the assumption that each sample is assigned
+to one and only one label - one sample cannot, for example, be both a pear
+and an apple.
+
+While all scikit-learn classifiers are capable of multiclass classification,
+the meta-estimators offered by :mod:`sklearn.multiclass`
+permit changing the way they handle more than two classes
+because this may have an effect on classifier performance
+(either in terms of generalization error or required computational resources).
+
+Target format
+-------------
+
+Valid :term:`multiclass` representations for
+:func:`~sklearn.utils.multiclass.type_of_target` (`y`) are:
 
-Multilabel classification format
-================================
+  - 1d or column vector containing more than two discrete values. An
+    example of a vector ``y`` for 4 samples:
 
-In multilabel learning, the joint set of binary classification tasks is
-expressed with label binary indicator array: each sample is one row of a 2d
-array of shape (n_samples, n_classes) with binary values: the one, i.e. the non
-zero elements, corresponds to the subset of labels. An array such as
-``np.array([[1, 0, 0], [0, 1, 1], [0, 0, 0]])`` represents label 0 in the first
-sample, labels 1 and 2 in the second sample, and no labels in the third sample.
-
-Producing multilabel data as a list of sets of labels may be more intuitive.
-The :class:`MultiLabelBinarizer <sklearn.preprocessing.MultiLabelBinarizer>`
-transformer can be used to convert between a collection of collections of
-labels and the indicator format.
-
-  >>> from sklearn.preprocessing import MultiLabelBinarizer
-  >>> y = [[2, 3, 4], [2], [0, 1, 3], [0, 1, 2, 3, 4], [0, 1, 2]]
-  >>> MultiLabelBinarizer().fit_transform(y)
-  array([[0, 0, 1, 1, 1],
-         [0, 0, 1, 0, 0],
-         [1, 1, 0, 1, 0],
-         [1, 1, 1, 1, 1],
-         [1, 1, 1, 0, 0]])
+      >>> import numpy as np
+      >>> y = np.array(['apple', 'pear', 'apple', 'orange'])
+      >>> print(y)
+      ['apple' 'pear' 'apple' 'orange']
+
+  - Dense or sparse :term:`binary` matrix of shape ``(n_samples, n_classes)``
+    with a single sample per row, where each column represents one class. An
+    example of both a dense and sparse :term:`binary` matrix ``y`` for 4
+    samples, where the columns, in order, are apple, orange, and pear:
+
+      >>> import numpy as np
+      >>> from sklearn.preprocessing import LabelBinarizer
+      >>> y = np.array(['apple', 'pear', 'apple', 'orange'])
+      >>> y_dense = LabelBinarizer().fit_transform(y)
+      >>> print(y_dense)
+        [[1 0 0]
+         [0 0 1]
+         [1 0 0]
+         [0 1 0]]
+      >>> from scipy import sparse
+      >>> y_sparse = sparse.csr_matrix(y_dense)
+      >>> print(y_sparse)
+          (0, 0)	1
+          (1, 2)	1
+          (2, 0)	1
+          (3, 1)	1
+
+For more information about :class:`~sklearn.preprocessing.LabelBinarizer`,
+refer to :ref:`preprocessing_targets`.
 
 .. _ovr_classification:
 
-One-Vs-The-Rest
-===============
+OneVsRestClassifier
+-------------------
 
-This strategy, also known as **one-vs-all**, is implemented in
-:class:`OneVsRestClassifier`.  The strategy consists in fitting one classifier
-per class. For each classifier, the class is fitted against all the other
-classes. In addition to its computational efficiency (only `n_classes`
-classifiers are needed), one advantage of this approach is its
-interpretability. Since each class is represented by one and only one classifier,
-it is possible to gain knowledge about the class by inspecting its
+The **one-vs-rest** strategy, also known as **one-vs-all**, is implemented in
+:class:`~sklearn.multiclass.OneVsRestClassifier`.  The strategy consists in
+fitting one classifier per class. For each classifier, the class is fitted
+against all the other classes. In addition to its computational efficiency
+(only `n_classes` classifiers are needed), one advantage of this approach is
+its interpretability. Since each class is represented by one and only one
+classifier, it is possible to gain knowledge about the class by inspecting its
 corresponding classifier. This is the most commonly used strategy and is a fair
 default choice.
 
-Multiclass learning
--------------------
-
 Below is an example of multiclass learning using OvR::
 
   >>> from sklearn import datasets
@@ -303,12 +210,10 @@ Below is an example of multiclass learni
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
 
-Multilabel learning
--------------------
 
-:class:`OneVsRestClassifier` also supports multilabel classification.
-To use this feature, feed the classifier an indicator matrix, in which cell
-[i, j] indicates the presence of label j in sample i.
+:class:`~sklearn.multiclass.OneVsRestClassifier` also supports multilabel
+classification. To use this feature, feed the classifier an indicator matrix,
+in which cell [i, j] indicates the presence of label j in sample i.
 
 
 .. figure:: ../auto_examples/miscellaneous/images/sphx_glr_plot_multilabel_001.png
@@ -323,15 +228,15 @@ To use this feature, feed the classifier
 
 .. _ovo_classification:
 
-One-Vs-One
-==========
+OneVsOneClassifier
+------------------
 
-:class:`OneVsOneClassifier` constructs one classifier per pair of classes.
-At prediction time, the class which received the most votes is selected.
-In the event of a tie (among two classes with an equal number of votes), it
-selects the class with the highest aggregate classification confidence by
-summing over the pair-wise classification confidence levels computed by the
-underlying binary classifiers.
+:class:`~sklearn.multiclass.OneVsOneClassifier` constructs one classifier per
+pair of classes. At prediction time, the class which received the most votes
+is selected. In the event of a tie (among two classes with an equal number of
+votes), it selects the class with the highest aggregate classification
+confidence by summing over the pair-wise classification confidence levels
+computed by the underlying binary classifiers.
 
 Since it requires to fit ``n_classes * (n_classes - 1) / 2`` classifiers,
 this method is usually slower than one-vs-the-rest, due to its
@@ -342,9 +247,6 @@ a small subset of the data whereas, with
 dataset is used ``n_classes`` times. The decision function is the result
 of a monotonic transformation of the one-versus-one classification.
 
-Multiclass learning
--------------------
-
 Below is an example of multiclass learning using OvO::
 
   >>> from sklearn import datasets
@@ -368,33 +270,33 @@ Below is an example of multiclass learni
 
 .. _ecoc:
 
-Error-Correcting Output-Codes
-=============================
+OutputCodeClassifier
+--------------------
 
-Output-code based strategies are fairly different from one-vs-the-rest and
-one-vs-one. With these strategies, each class is represented in a Euclidean
-space, where each dimension can only be 0 or 1. Another way to put it is
-that each class is represented by a binary code (an array of 0 and 1). The
-matrix which keeps track of the location/code of each class is called the
-code book. The code size is the dimensionality of the aforementioned space.
-Intuitively, each class should be represented by a code as unique as
-possible and a good code book should be designed to optimize classification
-accuracy. In this implementation, we simply use a randomly-generated code
-book as advocated in [3]_ although more elaborate methods may be added in the
-future.
+Error-Correcting Output Code-based strategies are fairly different from
+one-vs-the-rest and one-vs-one. With these strategies, each class is
+represented in a Euclidean space, where each dimension can only be 0 or 1.
+Another way to put it is that each class is represented by a binary code (an
+array of 0 and 1). The matrix which keeps track of the location/code of each
+class is called the code book. The code size is the dimensionality of the
+aforementioned space. Intuitively, each class should be represented by a code
+as unique as possible and a good code book should be designed to optimize
+classification accuracy. In this implementation, we simply use a
+randomly-generated code book as advocated in [3]_ although more elaborate
+methods may be added in the future.
 
 At fitting time, one binary classifier per bit in the code book is fitted.
 At prediction time, the classifiers are used to project new points in the
 class space and the class closest to the points is chosen.
 
-In :class:`OutputCodeClassifier`, the ``code_size`` attribute allows the user to
-control the number of classifiers which will be used. It is a percentage of the
-total number of classes.
+In :class:`~sklearn.multiclass.OutputCodeClassifier`, the ``code_size``
+attribute allows the user to control the number of classifiers which will be
+used. It is a percentage of the total number of classes.
 
 A number between 0 and 1 will require fewer classifiers than
 one-vs-the-rest. In theory, ``log2(n_classes) / n_classes`` is sufficient to
 represent each class unambiguously. However, in practice, it may not lead to
-good accuracy since ``log2(n_classes)`` is much smaller than n_classes.
+good accuracy since ``log2(n_classes)`` is much smaller than `n_classes`.
 
 A number greater than 1 will require more classifiers than
 one-vs-the-rest. In this case, some classifiers will in theory correct for
@@ -403,10 +305,6 @@ In practice, however, this may not happe
 typically be correlated. The error-correcting output codes have a similar
 effect to bagging.
 
-
-Multiclass learning
--------------------
-
 Below is an example of multiclass learning using Output-Codes::
 
   >>> from sklearn import datasets
@@ -440,54 +338,144 @@ Below is an example of multiclass learni
       Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)
       2008.
 
-Multioutput regression
-======================
+.. _multilabel_classification:
 
-Multioutput regression support can be added to any regressor with
-:class:`MultiOutputRegressor`.  This strategy consists of fitting one
-regressor per target. Since each target is represented by exactly one
-regressor it is possible to gain knowledge about the target by
-inspecting its corresponding regressor. As
-:class:`MultiOutputRegressor` fits one regressor per target it can not
-take advantage of correlations between targets.
-
-Below is an example of multioutput regression:
+Multilabel classification
+=========================
 
-  >>> from sklearn.datasets import make_regression
-  >>> from sklearn.multioutput import MultiOutputRegressor
-  >>> from sklearn.ensemble import GradientBoostingRegressor
-  >>> X, y = make_regression(n_samples=10, n_targets=3, random_state=1)
-  >>> MultiOutputRegressor(GradientBoostingRegressor(random_state=0)).fit(X, y).predict(X)
-  array([[-154.75474165, -147.03498585,  -50.03812219],
-         [   7.12165031,    5.12914884,  -81.46081961],
-         [-187.8948621 , -100.44373091,   13.88978285],
-         [-141.62745778,   95.02891072, -191.48204257],
-         [  97.03260883,  165.34867495,  139.52003279],
-         [ 123.92529176,   21.25719016,   -7.84253   ],
-         [-122.25193977,  -85.16443186, -107.12274212],
-         [ -30.170388  ,  -94.80956739,   12.16979946],
-         [ 140.72667194,  176.50941682,  -17.50447799],
-         [ 149.37967282,  -81.15699552,   -5.72850319]])
-
-Multioutput classification
-==========================
-
-Multioutput classification support can be added to any classifier with
-:class:`MultiOutputClassifier`. This strategy consists of fitting one
-classifier per target.  This allows multiple target variable
+**Multilabel classification** (closely related to **multioutput**
+**classification**) is a classification task labeling each sample with ``m``
+labels from ``n_classes`` possible classes, where ``m`` can be 0 to
+``n_classes`` inclusive. This can be thought of as predicting properties of a
+sample that are not mutually exclusive. Formally, a binary output is assigned
+to each class, for every sample. Positive classes are indicated with 1 and
+negative classes with 0 or -1. It is thus comparable to running ``n_classes``
+binary classification tasks, for example with
+:class:`~sklearn.multioutput.MultiOutputClassifier`. This approach treats
+each label independently whereas multilabel classifiers *may* treat the
+multiple classes simultaneously, accounting for correlated behavior among
+them.
+
+For example, prediction of the topics relevant to a text document or video.
+The document or video may be about one of 'religion', 'politics', 'finance'
+or 'education', several of the topic classes or all of the topic classes.
+
+Target format
+-------------
+
+A valid representation of :term:`multilabel` `y` is an either dense or sparse
+:term:`binary` matrix of shape ``(n_samples, n_classes)``. Each column
+represents a class. The ``1``'s in each row denote the positive classes a
+sample has been labeled with. An example of a dense matrix ``y`` for 3
+samples:
+
+  >>> y = np.array([[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]])
+  >>> print(y)
+  [[1 0 0 1]
+   [0 0 1 1]
+   [0 0 0 0]]
+
+Dense binary matrices can also be created using
+:class:`~sklearn.preprocessing.MultiLabelBinarizer`. For more information,
+refer to :ref:`preprocessing_targets`.
+
+An example of the same ``y`` in sparse matrix form:
+
+  >>> y_sparse = sparse.csr_matrix(y)
+  >>> print(y_sparse)
+    (0, 0)	1
+    (0, 3)	1
+    (1, 2)	1
+    (1, 3)	1
+
+.. _multioutputclassfier:
+
+MultiOutputClassifier
+---------------------
+
+Multilabel classification support can be added to any classifier with
+:class:`~sklearn.multioutput.MultiOutputClassifier`. This strategy consists of
+fitting one classifier per target.  This allows multiple target variable
 classifications. The purpose of this class is to extend estimators
 to be able to estimate a series of target functions (f1,f2,f3...,fn)
 that are trained on a single X predictor matrix to predict a series
 of responses (y1,y2,y3...,yn).
 
-Below is an example of multioutput classification:
+You can find a usage example for
+:class:`~sklearn.multioutput.MultiOutputClassifier`
+as part of the section on :ref:`multiclass_multioutput_classification`
+since it is a generalization of multilabel classification to
+multiclass outputs instead of binary outputs.
+
+.. _classifierchain:
+
+ClassifierChain
+---------------
+
+Classifier chains (see :class:`~sklearn.multioutput.ClassifierChain`) are a way
+of combining a number of binary classifiers into a single multi-label model
+that is capable of exploiting correlations among targets.
+
+For a multi-label classification problem with N classes, N binary
+classifiers are assigned an integer between 0 and N-1. These integers
+define the order of models in the chain. Each classifier is then fit on the
+available training data plus the true labels of the classes whose
+models were assigned a lower number.
+
+When predicting, the true labels will not be available. Instead the
+predictions of each model are passed on to the subsequent models in the
+chain to be used as features.
+
+Clearly the order of the chain is important. The first model in the chain
+has no information about the other labels while the last model in the chain
+has features indicating the presence of all of the other labels. In general
+one does not know the optimal ordering of the models in the chain so
+typically many randomly ordered chains are fit and their predictions are
+averaged together.
+
+.. topic:: References:
+
+    Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank,
+        "Classifier Chains for Multi-label Classification", 2009.
+
+.. _multiclass_multioutput_classification:
+
+Multiclass-multioutput classification
+=====================================
+
+**Multiclass-multioutput classification**
+(also known as **multitask classification**) is a
+classification task which labels each sample with a set of **non-binary**
+properties. Both the number of properties and the number of
+classes per property is greater than 2. A single estimator thus
+handles several joint classification tasks. This is both a generalization of
+the multi\ *label* classification task, which only considers binary
+attributes, as well as a generalization of the multi\ *class* classification
+task, where only one property is considered.
+
+For example, classification of the properties "type of fruit" and "colour"
+for a set of images of fruit. The property "type of fruit" has the possible
+classes: "apple", "pear" and "orange". The property "colour" has the
+possible classes: "green", "red", "yellow" and "orange". Each sample is an
+image of a fruit, a label is output for both properties and each label is
+one of the possible classes of the corresponding property.
+
+Note that all classifiers handling multiclass-multioutput (also known as
+multitask classification) tasks, support the multilabel classification task
+as a special case. Multitask classification is similar to the multioutput
+classification task with different model formulations. For more information,
+see the relevant estimator documentat
+
+Below is an example of multiclass-multioutput classification:
 
     >>> from sklearn.datasets import make_classification
     >>> from sklearn.multioutput import MultiOutputClassifier
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> from sklearn.utils import shuffle
     >>> import numpy as np
-    >>> X, y1 = make_classification(n_samples=10, n_features=100, n_informative=30, n_classes=3, random_state=1)
+    >>> X, y1 = make_classification(n_samples=10, n_features=100,
+    ...                             n_informative=30, n_classes=3,
+    ...                             random_state=1)
     >>> y2 = shuffle(y1, random_state=1)
     >>> y3 = shuffle(y1, random_state=2)
     >>> Y = np.vstack((y1, y2, y3)).T
@@ -495,7 +483,7 @@ Below is an example of multioutput class
     >>> n_outputs = Y.shape[1] # 3
     >>> n_classes = 3
     >>> forest = RandomForestClassifier(random_state=1)
-    >>> multi_target_forest = MultiOutputClassifier(forest, n_jobs=-1)
+    >>> multi_target_forest = MultiOutputClassifier(forest, n_jobs=2)
     >>> multi_target_forest.fit(X, Y).predict(X)
     array([[2, 2, 0],
            [1, 2, 1],
@@ -508,43 +496,89 @@ Below is an example of multioutput class
            [0, 0, 2],
            [2, 0, 0]])
 
-.. _classifierchain:
+.. warning::
+    At present, no metric in :mod:`sklearn.metrics`
+    supports the multiclass-multioutput classification task.
 
-Classifier Chain
-================
+Target format
+-------------
 
-Classifier chains (see :class:`ClassifierChain`) are a way of combining a
-number of binary classifiers into a single multi-label model that is capable
-of exploiting correlations among targets.
+A valid representation of :term:`multioutput` `y` is a dense matrix of shape
+``(n_samples, n_classes)`` of class labels. A column wise concatenation of 1d
+:term:`multiclass` variables. An example of ``y`` for 3 samples:
+
+  >>> y = np.array([['apple', 'green'], ['orange', 'orange'], ['pear', 'green']])
+  >>> print(y)
+  [['apple' 'green']
+   ['orange' 'orange']
+   ['pear' 'green']]
 
-For a multi-label classification problem with N classes, N binary
-classifiers are assigned an integer between 0 and N-1. These integers
-define the order of models in the chain. Each classifier is then fit on the
-available training data plus the true labels of the classes whose
-models were assigned a lower number.
+.. _multioutput_regression:
 
-When predicting, the true labels will not be available. Instead the
-predictions of each model are passed on to the subsequent models in the
-chain to be used as features.
+Multioutput regression
+======================
 
-Clearly the order of the chain is important. The first model in the chain
-has no information about the other labels while the last model in the chain
-has features indicating the presence of all of the other labels. In general
-one does not know the optimal ordering of the models in the chain so
-typically many randomly ordered chains are fit and their predictions are
-averaged together.
+**Multioutput regression** predicts multiple numerical properties for each
+sample. Each property is a numerical variable and the number of properties
+to be predicted for each sample is greater than or equal to 2. Some estimators
+that support multioutput regression are faster than just running ``n_output``
+estimators.
+
+For example, prediction of both wind speed and wind direction, in degrees,
+using data obtained at a certain location. Each sample would be data
+obtained at one location and both wind speed and direction would be
+output for each sample.
+
+Target format
+-------------
+
+A valid representation of :term:`multioutput` `y` is a dense matrix of shape
+``(n_samples, n_output)`` of floats. A column wise concatenation of
+:term:`continuous` variables. An example of ``y`` for 3 samples:
+
+  >>> y = np.array([[31.4, 94], [40.5, 109], [25.0, 30]])
+  >>> print(y)
+  [[ 31.4  94. ]
+   [ 40.5 109. ]
+   [ 25.   30. ]]
 
-.. topic:: References:
+.. _multioutputregressor:
 
-    Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank,
-        "Classifier Chains for Multi-label Classification", 2009.
+MultiOutputRegressor
+--------------------
+
+Multioutput regression support can be added to any regressor with
+:class:`~sklearn.multioutput.MultiOutputRegressor`.  This strategy consists of
+fitting one regressor per target. Since each target is represented by exactly
+one regressor it is possible to gain knowledge about the target by
+inspecting its corresponding regressor. As
+:class:`~sklearn.multioutput.MultiOutputRegressor` fits one regressor per
+target it can not take advantage of correlations between targets.
+
+Below is an example of multioutput regression:
+
+  >>> from sklearn.datasets import make_regression
+  >>> from sklearn.multioutput import MultiOutputRegressor
+  >>> from sklearn.ensemble import GradientBoostingRegressor
+  >>> X, y = make_regression(n_samples=10, n_targets=3, random_state=1)
+  >>> MultiOutputRegressor(GradientBoostingRegressor(random_state=0)).fit(X, y).predict(X)
+  array([[-154.75474165, -147.03498585,  -50.03812219],
+         [   7.12165031,    5.12914884,  -81.46081961],
+         [-187.8948621 , -100.44373091,   13.88978285],
+         [-141.62745778,   95.02891072, -191.48204257],
+         [  97.03260883,  165.34867495,  139.52003279],
+         [ 123.92529176,   21.25719016,   -7.84253   ],
+         [-122.25193977,  -85.16443186, -107.12274212],
+         [ -30.170388  ,  -94.80956739,   12.16979946],
+         [ 140.72667194,  176.50941682,  -17.50447799],
+         [ 149.37967282,  -81.15699552,   -5.72850319]])
 
 .. _regressorchain:
 
-Regressor Chain
-================
+RegressorChain
+--------------
 
-Regressor chains (see :class:`RegressorChain`) is analogous to
-ClassifierChain as a way of combining a number of regressions
-into a single multi-target model that is capable of exploiting
-correlations among targets.
+Regressor chains (see :class:`~sklearn.multioutput.RegressorChain`) is
+analogous to :class:`~sklearn.multioutput.ClassifierChain` as a way of
+combining a number of regressions into a single multi-target model that is
+capable of exploiting correlations among targets.
diff -pruN 0.23.2-5/doc/modules/neighbors.rst 1.1.1-1/doc/modules/neighbors.rst
--- 0.23.2-5/doc/modules/neighbors.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/neighbors.rst	2022-05-19 12:16:26.452782900 +0000
@@ -110,9 +110,9 @@ The dataset is structured such that poin
 in parameter space, leading to an approximately block-diagonal matrix of
 K-nearest neighbors.  Such a sparse graph is useful in a variety of
 circumstances which make use of spatial relationships between points for
-unsupervised learning: in particular, see :class:`sklearn.manifold.Isomap`,
-:class:`sklearn.manifold.LocallyLinearEmbedding`, and
-:class:`sklearn.cluster.SpectralClustering`.
+unsupervised learning: in particular, see :class:`~sklearn.manifold.Isomap`,
+:class:`~sklearn.manifold.LocallyLinearEmbedding`, and
+:class:`~sklearn.cluster.SpectralClustering`.
 
 KDTree and BallTree Classes
 ---------------------------
@@ -136,8 +136,9 @@ have the same interface; we'll show an e
 Refer to the :class:`KDTree` and :class:`BallTree` class documentation
 for more information on the options available for nearest neighbors searches,
 including specification of query strategies, distance metrics, etc. For a list
-of available metrics, see the documentation of the :class:`DistanceMetric`
-class.
+of available metrics, see the documentation of the :class:`DistanceMetric` class
+and the metrics listed in `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`.
+Note that the "cosine" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.
 
 .. _classification:
 
@@ -339,7 +340,7 @@ a *KD-tree* in high dimensions, though t
 dependent on the structure of the training data.
 In scikit-learn, ball-tree-based
 neighbors searches are specified using the keyword ``algorithm = 'ball_tree'``,
-and are computed using the class :class:`sklearn.neighbors.BallTree`.
+and are computed using the class :class:`BallTree`.
 Alternatively, the user can work with the :class:`BallTree` class directly.
 
 .. topic:: References:
@@ -415,14 +416,25 @@ depends on a number of factors:
   a significant fraction of the total cost.  If very few query points
   will be required, brute force is better than a tree-based method.
 
-Currently, ``algorithm = 'auto'`` selects ``'brute'`` if :math:`k >= N/2`,
-the input data is sparse, or ``effective_metric_`` isn't in
-the ``VALID_METRICS`` list for either ``'kd_tree'`` or ``'ball_tree'``.
-Otherwise, it selects the first out of ``'kd_tree'`` and ``'ball_tree'``
-that has ``effective_metric_`` in its ``VALID_METRICS`` list.
-This choice is based on the assumption that the number of query points is at
-least the same order as the number of training points, and that ``leaf_size``
-is close to its default value of ``30``.
+Currently, ``algorithm = 'auto'`` selects ``'brute'`` if any of the following
+conditions are verified:
+
+* input data is sparse
+* ``metric = 'precomputed'``
+* :math:`D > 15`
+* :math:`k >= N/2`
+* ``effective_metric_`` isn't in the ``VALID_METRICS`` list for either
+  ``'kd_tree'`` or ``'ball_tree'``
+
+Otherwise, it selects the first out of ``'kd_tree'`` and ``'ball_tree'`` that
+has ``effective_metric_`` in its ``VALID_METRICS`` list. This heuristic is
+based on the following assumptions:
+
+* the number of query points is at least the same order as the number of
+  training points
+* ``leaf_size`` is close to its default value of ``30``
+* when :math:`D > 15`, the intrinsic dimensionality of the data is generally
+  too high for tree-based methods
 
 Effect of ``leaf_size``
 -----------------------
@@ -453,6 +465,21 @@ leaf nodes.  The level of this switch ca
 
 ``leaf_size`` is not referenced for brute force queries.
 
+Valid Metrics for Nearest Neighbor Algorithms
+---------------------------------------------
+
+For a list of available metrics, see the documentation of the :class:`DistanceMetric`
+class and the metrics listed in `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`.
+Note that the "cosine" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.
+
+A list of valid metrics for any of the above algorithms can be obtained by using their
+``valid_metric`` attribute. For example, valid metrics for ``KDTree`` can be generated by:
+
+    >>> from sklearn.neighbors import KDTree
+    >>> print(sorted(KDTree.valid_metrics))
+    ['chebyshev', 'cityblock', 'euclidean', 'infinity', 'l1', 'l2', 'manhattan', 'minkowski', 'p']
+
+
 .. _nearest_centroid_classifier:
 
 Nearest Centroid Classifier
@@ -460,12 +487,12 @@ Nearest Centroid Classifier
 
 The :class:`NearestCentroid` classifier is a simple algorithm that represents
 each class by the centroid of its members. In effect, this makes it
-similar to the label updating phase of the :class:`sklearn.cluster.KMeans` algorithm.
+similar to the label updating phase of the :class:`~sklearn.cluster.KMeans` algorithm.
 It also has no parameters to choose, making it a good baseline classifier. It
 does, however, suffer on non-convex classes, as well as when classes have
 drastically different variances, as equal variance in all dimensions is
-assumed. See Linear Discriminant Analysis (:class:`sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
-and Quadratic Discriminant Analysis (:class:`sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis`)
+assumed. See Linear Discriminant Analysis (:class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
+and Quadratic Discriminant Analysis (:class:`~sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis`)
 for more complex methods that do not make this assumption. Usage of the default
 :class:`NearestCentroid` is simple:
 
@@ -537,13 +564,20 @@ First, the precomputed graph can be re-u
 varying a parameter of the estimator. This can be done manually by the user, or
 using the caching properties of the scikit-learn pipeline:
 
+    >>> import tempfile
     >>> from sklearn.manifold import Isomap
     >>> from sklearn.neighbors import KNeighborsTransformer
     >>> from sklearn.pipeline import make_pipeline
+    >>> from sklearn.datasets import make_regression
+    >>> cache_path = tempfile.gettempdir()  # we use a temporary folder here
+    >>> X, _ = make_regression(n_samples=50, n_features=25, random_state=0)
     >>> estimator = make_pipeline(
-    ...     KNeighborsTransformer(n_neighbors=5, mode='distance'),
-    ...     Isomap(neighbors_algorithm='precomputed'),
-    ...     memory='/path/to/cache')
+    ...     KNeighborsTransformer(mode='distance'),
+    ...     Isomap(n_components=3, metric='precomputed'),
+    ...     memory=cache_path)
+    >>> X_embedded = estimator.fit_transform(X)
+    >>> X_embedded.shape
+    (50, 3)
 
 Second, precomputing the graph can give finer control on the nearest neighbors
 estimation, for instance enabling multiprocessing though the parameter
@@ -695,8 +729,8 @@ are projected onto a linear subspace con
 minimize the NCA objective. The desired dimensionality can be set using the
 parameter ``n_components``. For instance, the following figure shows a
 comparison of dimensionality reduction with Principal Component Analysis
-(:class:`sklearn.decomposition.PCA`), Linear Discriminant Analysis
-(:class:`sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) and
+(:class:`~sklearn.decomposition.PCA`), Linear Discriminant Analysis
+(:class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) and
 Neighborhood Component Analysis (:class:`NeighborhoodComponentsAnalysis`) on
 the Digits dataset, a dataset with size :math:`n_{samples} = 1797` and
 :math:`n_{features} = 64`. The data set is split into a training and a test set
diff -pruN 0.23.2-5/doc/modules/neural_networks_supervised.rst 1.1.1-1/doc/modules/neural_networks_supervised.rst
--- 0.23.2-5/doc/modules/neural_networks_supervised.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/neural_networks_supervised.rst	2022-05-19 12:16:26.452782900 +0000
@@ -184,7 +184,7 @@ Algorithms
 
 MLP trains using `Stochastic Gradient Descent
 <https://en.wikipedia.org/wiki/Stochastic_gradient_descent>`_,
-`Adam <https://arxiv.org/abs/1412.6980>`_, or
+:arxiv:`Adam <1412.6980>`, or
 `L-BFGS <https://en.wikipedia.org/wiki/Limited-memory_BFGS>`__.
 Stochastic Gradient Descent (SGD) updates parameters using the gradient of the
 loss function with respect to a parameter that needs adaptation, i.e.
@@ -266,21 +266,22 @@ In regression, the output remains as :ma
 function is just the identity function.
 
 MLP uses different loss functions depending on the problem type. The loss
-function for classification is Cross-Entropy, which in binary case is given as,
+function for classification is Average Cross-Entropy, which in binary case is
+given as,
 
 .. math::
 
-    Loss(\hat{y},y,W) = -y \ln {\hat{y}} - (1-y) \ln{(1-\hat{y})} + \alpha ||W||_2^2
+    Loss(\hat{y},y,W) = -\dfrac{1}{n}\sum_{i=0}^n(y_i \ln {\hat{y_i}} + (1-y_i) \ln{(1-\hat{y_i})}) + \dfrac{\alpha}{2n} ||W||_2^2
 
 where :math:`\alpha ||W||_2^2` is an L2-regularization term (aka penalty)
 that penalizes complex models; and :math:`\alpha > 0` is a non-negative
 hyperparameter that controls the magnitude of the penalty.
 
-For regression, MLP uses the Square Error loss function; written as,
+For regression, MLP uses the Mean Square Error loss function; written as,
 
 .. math::
 
-    Loss(\hat{y},y,W) = \frac{1}{2}||\hat{y} - y ||_2^2 + \frac{\alpha}{2} ||W||_2^2
+    Loss(\hat{y},y,W) = \frac{1}{2n}\sum_{i=0}^n||\hat{y}_i - y_i ||_2^2 + \frac{\alpha}{2n} ||W||_2^2
 
 
 Starting from initial random weights, multi-layer perceptron (MLP) minimizes
@@ -368,6 +369,6 @@ or want to do additional monitoring, usi
       Y. LeCun, L. Bottou, G. Orr, K. Müller - In Neural Networks: Tricks
       of the Trade 1998.
 
-    *  `"Adam: A method for stochastic optimization."
-       <https://arxiv.org/pdf/1412.6980v8.pdf>`_
-       Kingma, Diederik, and Jimmy Ba. arXiv preprint arXiv:1412.6980 (2014).
+    *  :arxiv:`"Adam: A method for stochastic optimization."
+       <1412.6980>`
+       Kingma, Diederik, and Jimmy Ba (2014)
diff -pruN 0.23.2-5/doc/modules/outlier_detection.rst 1.1.1-1/doc/modules/outlier_detection.rst
--- 0.23.2-5/doc/modules/outlier_detection.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/outlier_detection.rst	2022-05-19 12:16:26.456783000 +0000
@@ -74,20 +74,22 @@ not available.
   When ``novelty`` is set to ``True`` be aware that you must only use
   ``predict``, ``decision_function`` and ``score_samples`` on new unseen data
   and not on the training samples as this would lead to wrong results.
+  I.e., the result of ``predict`` will not be the same as ``fit_predict``.
   The scores of abnormality of the training samples are always accessible
   through the ``negative_outlier_factor_`` attribute.
 
 The behavior of :class:`neighbors.LocalOutlierFactor` is summarized in the
 following table.
 
-===================== ================================ =====================
-Method                Outlier detection                Novelty detection
-===================== ================================ =====================
-``fit_predict``       OK                               Not available
-``predict``           Not available                    Use only on new data
-``decision_function`` Not available                    Use only on new data
-``score_samples``     Use ``negative_outlier_factor_`` Use only on new data
-===================== ================================ =====================
+============================ ================================ =====================
+Method                       Outlier detection                Novelty detection
+============================ ================================ =====================
+``fit_predict``              OK                               Not available
+``predict``                  Not available                    Use only on new data
+``decision_function``        Not available                    Use only on new data
+``score_samples``            Use ``negative_outlier_factor_`` Use only on new data
+``negative_outlier_factor_`` OK                               OK
+============================ ================================ =====================
 
 
 Overview of outlier detection methods
@@ -106,9 +108,18 @@ detection.
 :class:`ensemble.IsolationForest` and :class:`neighbors.LocalOutlierFactor`
 perform reasonably well on the data sets considered here.
 The :class:`svm.OneClassSVM` is known to be sensitive to outliers and thus
-does not perform very well for outlier detection. Finally,
-:class:`covariance.EllipticEnvelope` assumes the data is Gaussian and learns
-an ellipse. For more details on the different estimators refer to the example
+does not perform very well for outlier detection. That being said, outlier
+detection in high-dimension, or without any assumptions on the distribution
+of the inlying data is very challenging. :class:`svm.OneClassSVM` may still
+be used with outlier detection but requires fine-tuning of its hyperparameter
+`nu` to handle outliers and prevent overfitting.
+:class:`linear_model.SGDOneClassSVM` provides an implementation of a
+linear One-Class SVM with a linear complexity in the number of samples. This
+implementation is here used with a kernel approximation technique to obtain
+results similar to :class:`svm.OneClassSVM` which uses a Gaussian kernel
+by default. Finally, :class:`covariance.EllipticEnvelope` assumes the data is
+Gaussian and learns an ellipse. For more details on the different estimators
+refer to the example
 :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` and the
 sections hereunder.
 
@@ -120,6 +131,12 @@ sections hereunder.
     :class:`neighbors.LocalOutlierFactor` and
     :class:`covariance.EllipticEnvelope`.
 
+  * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_outlier_detection_bench.py`
+    for an example showing how to evaluate outlier detection estimators,
+    the :class:`neighbors.LocalOutlierFactor` and the
+    :class:`ensemble.IsolationForest`, using ROC curves from
+    :class:`metrics.RocCurveDisplay`.
+
 Novelty Detection
 =================
 
@@ -146,15 +163,15 @@ and implemented in the :ref:`svm` module
 kernel and a scalar parameter to define a frontier.  The RBF kernel is
 usually chosen although there exists no exact formula or algorithm to
 set its bandwidth parameter. This is the default in the scikit-learn
-implementation. The :math:`\nu` parameter, also known as the margin of
+implementation. The `nu` parameter, also known as the margin of
 the One-Class SVM, corresponds to the probability of finding a new,
 but regular, observation outside the frontier.
 
 .. topic:: References:
 
     * `Estimating the support of a high-dimensional distribution
-      <https://dl.acm.org/citation.cfm?id=1119749>`_ Schölkopf,
-      Bernhard, et al. Neural computation 13.7 (2001): 1443-1471.
+      <http://www.recognition.mccme.ru/pub/papers/SVM/sch99estimating.pdf>`_
+      Schölkopf, Bernhard, et al. Neural computation 13.7 (2001): 1443-1471.
 
 .. topic:: Examples:
 
@@ -169,6 +186,23 @@ but regular, observation outside the fro
    :scale: 75%
 
 
+Scaling up the One-Class SVM
+----------------------------
+
+An online linear version of the One-Class SVM is implemented in
+:class:`linear_model.SGDOneClassSVM`. This implementation scales linearly with
+the number of samples and can be used with a kernel approximation to
+approximate the solution of a kernelized :class:`svm.OneClassSVM` whose
+complexity is at best quadratic in the number of samples. See section
+:ref:`sgd_online_one_class_svm` for more details.
+
+.. topic:: Examples:
+
+  * See :ref:`sphx_glr_auto_examples_linear_model_plot_sgdocsvm_vs_ocsvm.py`
+    for an illustration of the approximation of a kernelized One-Class SVM
+    with the `linear_model.SGDOneClassSVM` combined with kernel approximation.
+
+
 Outlier Detection
 =================
 
@@ -274,14 +308,15 @@ allows you to add more trees to an alrea
      for a comparison of :class:`ensemble.IsolationForest` with
      :class:`neighbors.LocalOutlierFactor`,
      :class:`svm.OneClassSVM` (tuned to perform like an outlier detection
-     method) and a covariance-based outlier detection with
-     :class:`covariance.EllipticEnvelope`.
+     method), :class:`linear_model.SGDOneClassSVM`, and a covariance-based
+     outlier detection with :class:`covariance.EllipticEnvelope`.
 
 .. topic:: References:
 
     * Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest."
       Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.
 
+.. _local_outlier_factor:
 
 Local Outlier Factor
 --------------------
@@ -306,7 +341,7 @@ chosen 1) greater than the minimum numbe
 so that other objects can be local outliers relative to this cluster, and 2)
 smaller than the maximum number of close by objects that can potentially be
 local outliers.
-In practice, such informations are generally not available, and taking
+In practice, such information is generally not available, and taking
 n_neighbors=20 appears to work well in general.
 When the proportion of outliers is high (i.e. greater than 10 \%, as in the
 example below), n_neighbors should be greater (n_neighbors=35 in the example
@@ -324,13 +359,14 @@ method. The scores of abnormality of the
 through the ``negative_outlier_factor_`` attribute.
 Note that ``predict``, ``decision_function`` and ``score_samples`` can be used
 on new unseen data when LOF is applied for novelty detection, i.e. when the
-``novelty`` parameter is set to ``True``. See :ref:`novelty_with_lof`.
+``novelty`` parameter is set to ``True``, but the result of ``predict`` may
+differ from that of ``fit_predict``. See :ref:`novelty_with_lof`.
 
 
 This strategy is illustrated below.
 
 .. figure:: ../auto_examples/neighbors/images/sphx_glr_plot_lof_outlier_detection_001.png
-   :target: ../auto_examples/neighbors/sphx_glr_plot_lof_outlier_detection.html
+   :target: ../auto_examples/neighbors/plot_lof_outlier_detection.html
    :align: center
    :scale: 75%
 
@@ -362,20 +398,20 @@ set to ``True`` before fitting the estim
   lof = LocalOutlierFactor(novelty=True)
   lof.fit(X_train)
 
-Note that ``fit_predict`` is not available in this case.
+Note that ``fit_predict`` is not available in this case to avoid inconsistencies.
 
 .. warning:: **Novelty detection with Local Outlier Factor`**
 
   When ``novelty`` is set to ``True`` be aware that you must only use
   ``predict``, ``decision_function`` and ``score_samples`` on new unseen data
   and not on the training samples as this would lead to wrong results.
+  I.e., the result of ``predict`` will not be the same as ``fit_predict``.
   The scores of abnormality of the training samples are always accessible
   through the ``negative_outlier_factor_`` attribute.
 
 Novelty detection with Local Outlier Factor is illustrated below.
 
   .. figure:: ../auto_examples/neighbors/images/sphx_glr_plot_lof_novelty_detection_001.png
-     :target: ../auto_examples/neighbors/sphx_glr_plot_lof_novelty_detection.html
+     :target: ../auto_examples/neighbors/plot_lof_novelty_detection.html
      :align: center
      :scale: 75%
-
diff -pruN 0.23.2-5/doc/modules/partial_dependence.rst 1.1.1-1/doc/modules/partial_dependence.rst
--- 0.23.2-5/doc/modules/partial_dependence.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/partial_dependence.rst	2022-05-19 12:16:26.456783000 +0000
@@ -1,61 +1,75 @@
 
 .. _partial_dependence:
 
-========================
-Partial dependence plots
-========================
+===============================================================
+Partial Dependence and Individual Conditional Expectation plots
+===============================================================
 
 .. currentmodule:: sklearn.inspection
 
-Partial dependence plots (PDP) show the dependence between the target
-response [1]_ and a set of 'target' features, marginalizing over the values
-of all other features (the 'complement' features). Intuitively, we can
+Partial dependence plots (PDP) and individual conditional expectation (ICE)
+plots can be used to visualize and analyze interaction between the target
+response [1]_ and a set of input features of interest.
+
+Both PDPs [H2009]_ and ICEs [G2015]_ assume that the input features of interest
+are independent from the complement features, and this assumption is often
+violated in practice. Thus, in the case of correlated features, we will
+create absurd data points to compute the PDP/ICE [M2019]_.
+
+Partial dependence plots
+========================
+
+Partial dependence plots (PDP) show the dependence between the target response
+and a set of input features of interest, marginalizing over the values
+of all other input features (the 'complement' features). Intuitively, we can
 interpret the partial dependence as the expected target response as a
-function of the 'target' features.
+function of the input features of interest.
 
-Due to the limits of human perception the size of the target feature set
-must be small (usually, one or two) thus the target features are usually
-chosen among the most important features.
-
-The figure below shows four one-way and one two-way partial dependence plots
-for the California housing dataset, with a :class:`GradientBoostingRegressor
-<sklearn.ensemble.GradientBoostingRegressor>`:
+Due to the limits of human perception the size of the set of input feature of
+interest must be small (usually, one or two) thus the input features of interest
+are usually chosen among the most important features.
 
-.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_partial_dependence_002.png
+The figure below shows two one-way and one two-way partial dependence plots for
+the California housing dataset, with a :class:`HistGradientBoostingRegressor
+<sklearn.ensemble.HistGradientBoostingRegressor>`:
+
+.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_partial_dependence_003.png
    :target: ../auto_examples/inspection/plot_partial_dependence.html
    :align: center
    :scale: 70
 
-One-way PDPs tell us about the interaction between the target response and
-the target feature (e.g. linear, non-linear). The upper left plot in the
-above figure shows the effect of the median income in a district on the
-median house price; we can clearly see a linear relationship among them. Note
-that PDPs assume that the target features are independent from the complement
-features, and this assumption is often violated in practice.
-
-PDPs with two target features show the interactions among the two features.
-For example, the two-variable PDP in the above figure shows the dependence
-of median house price on joint values of house age and average occupants per
-household. We can clearly see an interaction between the two features: for
-an average occupancy greater than two, the house price is nearly independent of
-the house age, whereas for values less than 2 there is a strong dependence
-on age.
+One-way PDPs tell us about the interaction between the target response and an
+input feature of interest feature (e.g. linear, non-linear). The left plot
+in the above figure shows the effect of the average occupancy on the median
+house price; we can clearly see a linear relationship among them when the
+average occupancy is inferior to 3 persons. Similarly, we could analyze the
+effect of the house age on the median house price (middle plot). Thus, these
+interpretations are marginal, considering a feature at a time.
+
+PDPs with two input features of interest show the interactions among the two
+features. For example, the two-variable PDP in the above figure shows the
+dependence of median house price on joint values of house age and average
+occupants per household. We can clearly see an interaction between the two
+features: for an average occupancy greater than two, the house price is nearly
+independent of the house age, whereas for values less than 2 there is a strong
+dependence on age.
 
 The :mod:`sklearn.inspection` module provides a convenience function
-:func:`plot_partial_dependence` to create one-way and two-way partial
+:func:`~PartialDependenceDisplay.from_estimator` to create one-way and two-way partial
 dependence plots. In the below example we show how to create a grid of
 partial dependence plots: two one-way PDPs for the features ``0`` and ``1``
 and a two-way PDP between the two features::
 
     >>> from sklearn.datasets import make_hastie_10_2
     >>> from sklearn.ensemble import GradientBoostingClassifier
-    >>> from sklearn.inspection import plot_partial_dependence
+    >>> from sklearn.inspection import PartialDependenceDisplay
 
     >>> X, y = make_hastie_10_2(random_state=0)
     >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
     ...     max_depth=1, random_state=0).fit(X, y)
     >>> features = [0, 1, (0, 1)]
-    >>> plot_partial_dependence(clf, X, features) #doctest: +SKIP
+    >>> PartialDependenceDisplay.from_estimator(clf, X, features)
+    <...>
 
 You can access the newly created figure and Axes objects using ``plt.gcf()``
 and ``plt.gca()``.
@@ -68,7 +82,8 @@ the PDPs should be created via the ``tar
     >>> mc_clf = GradientBoostingClassifier(n_estimators=10,
     ...     max_depth=1).fit(iris.data, iris.target)
     >>> features = [3, 2, (3, 2)]
-    >>> plot_partial_dependence(mc_clf, X, features, target=0) #doctest: +SKIP
+    >>> PartialDependenceDisplay.from_estimator(mc_clf, X, features, target=0)
+    <...>
 
 The same parameter ``target`` is used to specify the target in multi-output
 regression settings.
@@ -79,24 +94,93 @@ the plots, you can use the
 
     >>> from sklearn.inspection import partial_dependence
 
-    >>> pdp, axes = partial_dependence(clf, X, [0])
-    >>> pdp
+    >>> results = partial_dependence(clf, X, [0])
+    >>> results["average"]
     array([[ 2.466...,  2.466..., ...
-    >>> axes
+    >>> results["values"]
     [array([-1.624..., -1.592..., ...
 
 The values at which the partial dependence should be evaluated are directly
 generated from ``X``. For 2-way partial dependence, a 2D-grid of values is
 generated. The ``values`` field returned by
 :func:`sklearn.inspection.partial_dependence` gives the actual values
-used in the grid for each target feature. They also correspond to the axis
-of the plots.
+used in the grid for each input feature of interest. They also correspond to
+the axis of the plots.
+
+.. _individual_conditional:
+
+Individual conditional expectation (ICE) plot
+=============================================
+
+Similar to a PDP, an individual conditional expectation (ICE) plot
+shows the dependence between the target function and an input feature of
+interest. However, unlike a PDP, which shows the average effect of the input
+feature, an ICE plot visualizes the dependence of the prediction on a
+feature for each sample separately with one line per sample.
+Due to the limits of human perception, only one input feature of interest is
+supported for ICE plots.
+
+The figures below show four ICE plots for the California housing dataset,
+with a :class:`HistGradientBoostingRegressor
+<sklearn.ensemble.HistGradientBoostingRegressor>`. The second figure plots
+the corresponding PD line overlaid on ICE lines.
+
+.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_partial_dependence_002.png
+   :target: ../auto_examples/inspection/plot_partial_dependence.html
+   :align: center
+   :scale: 70
+
+While the PDPs are good at showing the average effect of the target features,
+they can obscure a heterogeneous relationship created by interactions.
+When interactions are present the ICE plot will provide many more insights.
+For example, we could observe a linear relationship between the median income
+and the house price in the PD line. However, the ICE lines show that there
+are some exceptions, where the house price remains constant in some ranges of
+the median income.
+
+The :mod:`sklearn.inspection` module's :meth:`PartialDependenceDisplay.from_estimator`
+convenience function can be used to create ICE plots by setting
+``kind='individual'``. In the example below, we show how to create a grid of
+ICE plots:
+
+    >>> from sklearn.datasets import make_hastie_10_2
+    >>> from sklearn.ensemble import GradientBoostingClassifier
+    >>> from sklearn.inspection import PartialDependenceDisplay
+
+    >>> X, y = make_hastie_10_2(random_state=0)
+    >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
+    ...     max_depth=1, random_state=0).fit(X, y)
+    >>> features = [0, 1]
+    >>> PartialDependenceDisplay.from_estimator(clf, X, features,
+    ...     kind='individual')
+    <...>
+
+In ICE plots it might not be easy to see the average effect of the input
+feature of interest. Hence, it is recommended to use ICE plots alongside
+PDPs. They can be plotted together with
+``kind='both'``.
+
+    >>> PartialDependenceDisplay.from_estimator(clf, X, features,
+    ...     kind='both')
+    <...>
+
+If there are too many lines in an ICE plot, it can be difficult to see
+differences between individual samples and interpret the model. Centering the
+ICE at the first value on the x-axis, produces centered Individual Conditional
+Expectation (cICE) plots [G2015]_. This puts emphasis on the divergence of
+individual conditional expectations from the mean line, thus making it easier
+to explore heterogeneous relationships. cICE plots can be plotted by setting
+`centered=True`:
+
+    >>> PartialDependenceDisplay.from_estimator(clf, X, features,
+    ...     kind='both', centered=True)
+    <...>
 
 Mathematical Definition
-^^^^^^^^^^^^^^^^^^^^^^^
+=======================
 
-Let :math:`X_S` be the set of target features (i.e. the `features` parameter)
-and let :math:`X_C` be its complement.
+Let :math:`X_S` be the set of input features of interest (i.e. the `features`
+parameter) and let :math:`X_C` be its complement.
 
 The partial dependence of the response :math:`f` at a point :math:`x_S` is
 defined as:
@@ -112,17 +196,19 @@ values are defined by :math:`x_S` for th
 :math:`x_C` for the features in :math:`X_C`. Note that :math:`x_S` and
 :math:`x_C` may be tuples.
 
-Computing this integral for various values of :math:`x_S` produces a plot as
-above.
+Computing this integral for various values of :math:`x_S` produces a PDP plot
+as above. An ICE line is defined as a single :math:`f(x_{S}, x_{C}^{(i)})`
+evaluated at :math:`x_{S}`.
 
 Computation methods
-^^^^^^^^^^^^^^^^^^^
+===================
 
 There are two main methods to approximate the integral above, namely the
 'brute' and 'recursion' methods. The `method` parameter controls which method
 to use.
 
-The 'brute' method is a generic method that works with any estimator. It
+The 'brute' method is a generic method that works with any estimator. Note that
+computing ICE plots is only supported with the 'brute' method. It
 approximates the above integral by computing an average over the data `X`:
 
 .. math::
@@ -133,22 +219,27 @@ where :math:`x_C^{(i)}` is the value of
 :math:`X_C`. For each value of :math:`x_S`, this method requires a full pass
 over the dataset `X` which is computationally intensive.
 
+Each of the :math:`f(x_{S}, x_{C}^{(i)})` corresponds to one ICE line evaluated
+at :math:`x_{S}`. Computing this for multiple values of :math:`x_{S}`, one
+obtains a full ICE line. As one can see, the average of the ICE lines
+correspond to the partial dependence line.
+
 The 'recursion' method is faster than the 'brute' method, but it is only
-supported by some tree-based estimators. It is computed as follows. For a
-given point :math:`x_S`, a weighted tree traversal is performed: if a split
-node involves a 'target' feature, the corresponding left or right branch is
-followed; otherwise both branches are followed, each branch being weighted
-by the fraction of training samples that entered that branch. Finally, the
-partial dependence is given by a weighted average of all the visited leaves
-values.
+supported for PDP plots by some tree-based estimators. It is computed as
+follows. For a given point :math:`x_S`, a weighted tree traversal is performed:
+if a split node involves an input feature of interest, the corresponding left
+or right branch is followed; otherwise both branches are followed, each branch
+being weighted by the fraction of training samples that entered that branch.
+Finally, the partial dependence is given by a weighted average of all the
+visited leaves values.
 
 With the 'brute' method, the parameter `X` is used both for generating the
 grid of values :math:`x_S` and the complement feature values :math:`x_C`.
 However with the 'recursion' method, `X` is only used for the grid values:
 implicitly, the :math:`x_C` values are those of the training data.
 
-By default, the 'recursion' method is used on tree-based estimators that
-support it, and 'brute' is used for the rest.
+By default, the 'recursion' method is used for plotting PDPs on tree-based
+estimators that support it, and 'brute' is used for the rest.
 
 .. _pdp_method_differences:
 
@@ -163,21 +254,32 @@ support it, and 'brute' is used for the
     samples differently. Remember, however, that the primary assumption for
     interpreting PDPs is that the features should be independent.
 
+
+.. topic:: Examples:
+
+ * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
+
 .. rubric:: Footnotes
 
 .. [1] For classification, the target response may be the probability of a
    class (the positive class for binary classification), or the decision
    function.
 
-.. topic:: Examples:
-
- * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
-
 .. topic:: References
 
-    T. Hastie, R. Tibshirani and J. Friedman, `The Elements of
-    Statistical Learning <https://web.stanford.edu/~hastie/ElemStatLearn//>`_,
-    Second Edition, Section 10.13.2, Springer, 2009.
-
-    C. Molnar, `Interpretable Machine Learning
-    <https://christophm.github.io/interpretable-ml-book/>`_, Section 5.1, 2019.
+    .. [H2009] T. Hastie, R. Tibshirani and J. Friedman,
+               `The Elements of Statistical Learning
+               <https://web.stanford.edu/~hastie/ElemStatLearn//>`_,
+               Second Edition, Section 10.13.2, Springer, 2009.
+
+    .. [M2019] C. Molnar,
+               `Interpretable Machine Learning
+               <https://christophm.github.io/interpretable-ml-book/>`_,
+               Section 5.1, 2019.
+
+    .. [G2015] :arxiv:`A. Goldstein, A. Kapelner, J. Bleich, and E. Pitkin,
+               "Peeking Inside the Black Box: Visualizing Statistical
+               Learning With Plots of Individual Conditional Expectation"
+               Journal of Computational and Graphical Statistics,
+               24(1): 44-65, Springer, 2015.
+               <1309.6392>`
diff -pruN 0.23.2-5/doc/modules/permutation_importance.rst 1.1.1-1/doc/modules/permutation_importance.rst
--- 0.23.2-5/doc/modules/permutation_importance.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/permutation_importance.rst	2022-05-19 12:16:26.456783000 +0000
@@ -16,6 +16,16 @@ indicative of how much the model depends
 benefits from being model agnostic and can be calculated many times with
 different permutations of the feature.
 
+.. warning::
+
+  Features that are deemed of **low importance for a bad model** (low
+  cross-validation score) could be **very important for a good model**.
+  Therefore it is always important to evaluate the predictive power of a model
+  using a held-out set (or better with cross-validation) prior to computing
+  importances. Permutation importance does not reflect to the intrinsic
+  predictive value of a feature by itself but **how important this feature is
+  for a particular model**.
+
 The :func:`permutation_importance` function calculates the feature importance
 of :term:`estimators` for a given dataset. The ``n_repeats`` parameter sets the
 number of times a feature is randomly shuffled and returns a sample of feature
@@ -64,15 +74,49 @@ highlight which features contribute the
 inspected model. Features that are important on the training set but not on the
 held-out set might cause the model to overfit.
 
-.. warning::
-
-  Features that are deemed of **low importance for a bad model** (low
-  cross-validation score) could be **very important for a good model**.
-  Therefore it is always important to evaluate the predictive power of a model
-  using a held-out set (or better with cross-validation) prior to computing
-  importances. Permutation importance does not reflect to the intrinsic
-  predictive value of a feature by itself but **how important this feature is
-  for a particular model**.
+The permutation feature importance is the decrease in a model score when a single
+feature value is randomly shuffled. The score function to be used for the
+computation of importances can be specified with the `scoring` argument,
+which also accepts multiple scorers. Using multiple scorers is more computationally
+efficient than sequentially calling :func:`permutation_importance` several times
+with a different scorer, as it reuses model predictions.
+
+An example of using multiple scorers is shown below, employing a list of metrics,
+but more input formats are possible, as documented in :ref:`multimetric_scoring`.
+
+  >>> scoring = ['r2', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error']
+  >>> r_multi = permutation_importance(
+  ...     model, X_val, y_val, n_repeats=30, random_state=0, scoring=scoring)
+  ...
+  >>> for metric in r_multi:
+  ...     print(f"{metric}")
+  ...     r = r_multi[metric]
+  ...     for i in r.importances_mean.argsort()[::-1]:
+  ...         if r.importances_mean[i] - 2 * r.importances_std[i] > 0:
+  ...             print(f"    {diabetes.feature_names[i]:<8}"
+  ...                   f"{r.importances_mean[i]:.3f}"
+  ...                   f" +/- {r.importances_std[i]:.3f}")
+  ...
+  r2
+      s5      0.204 +/- 0.050
+      bmi     0.176 +/- 0.048
+      bp      0.088 +/- 0.033
+      sex     0.056 +/- 0.023
+  neg_mean_absolute_percentage_error
+      s5      0.081 +/- 0.020
+      bmi     0.064 +/- 0.015
+      bp      0.029 +/- 0.010
+  neg_mean_squared_error
+      s5      1013.866 +/- 246.445
+      bmi     872.726 +/- 240.298
+      bp      438.663 +/- 163.022
+      sex     277.376 +/- 115.123
+
+The ranking of the features is approximately the same for different metrics even
+if the scales of the importance values are very different. However, this is not
+guaranteed and different metrics might lead to significantly different feature
+importances, in particular for models trained for imbalanced classification problems,
+for which the choice of the classification metric can be critical.
 
 Outline of the permutation importance algorithm
 -----------------------------------------------
@@ -101,7 +145,7 @@ Relation to impurity-based importance in
 Tree-based models provide an alternative measure of :ref:`feature importances
 based on the mean decrease in impurity <random_forest_feature_importance>`
 (MDI). Impurity is quantified by the splitting criterion of the decision trees
-(Gini, Entropy or Mean Squared Error). However, this method can give high
+(Gini, Log Loss or Mean Squared Error). However, this method can give high
 importance to features that may not be predictive on unseen data when the model
 is overfitting. Permutation-based feature importance, on the other hand, avoids
 this issue, since it can be computed on unseen data.
@@ -113,7 +157,7 @@ with a small number of possible categori
 
 Permutation-based feature importances do not exhibit such a bias. Additionally,
 the permutation feature importance may be computed performance metric on the
-model predictions predictions and can be used to analyze any model class (not
+model predictions and can be used to analyze any model class (not
 just tree-based models).
 
 The following example highlights the limitations of impurity-based feature
@@ -140,5 +184,5 @@ example:
 
 .. topic:: References:
 
-   .. [1] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
-       2001. https://doi.org/10.1023/A:1010933404324
+   .. [1] L. Breiman, :doi:`"Random Forests" <10.1023/A:1010933404324>`,
+      Machine Learning, 45(1), 5-32, 2001.
diff -pruN 0.23.2-5/doc/modules/preprocessing.rst 1.1.1-1/doc/modules/preprocessing.rst
--- 0.23.2-5/doc/modules/preprocessing.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/preprocessing.rst	2022-05-19 12:16:26.456783000 +0000
@@ -41,16 +41,27 @@ than others, it might dominate the objec
 estimator unable to learn from other features correctly as expected.
 
 
-The function :func:`scale` provides a quick and easy way to perform this
-operation on a single array-like dataset::
+The :mod:`~sklearn.preprocessing` module provides the
+:class:`StandardScaler` utility class, which is a quick and
+easy way to perform the following operation on an array-like
+dataset::
 
   >>> from sklearn import preprocessing
   >>> import numpy as np
   >>> X_train = np.array([[ 1., -1.,  2.],
   ...                     [ 2.,  0.,  0.],
   ...                     [ 0.,  1., -1.]])
-  >>> X_scaled = preprocessing.scale(X_train)
+  >>> scaler = preprocessing.StandardScaler().fit(X_train)
+  >>> scaler
+  StandardScaler()
+
+  >>> scaler.mean_
+  array([1. ..., 0. ..., 0.33...])
+
+  >>> scaler.scale_
+  array([0.81..., 0.81..., 1.24...])
 
+  >>> X_scaled = scaler.transform(X_train)
   >>> X_scaled
   array([[ 0.  ..., -1.22...,  1.33...],
          [ 1.22...,  0.  ..., -0.26...],
@@ -71,35 +82,26 @@ Scaled data has zero mean and unit varia
 
 ..    >>> print_options = np.set_printoptions(print_options)
 
-The ``preprocessing`` module further provides a utility class
-:class:`StandardScaler` that implements the ``Transformer`` API to compute
-the mean and standard deviation on a training set so as to be
-able to later reapply the same transformation on the testing set.
-This class is hence suitable for use in the early steps of a
-:class:`sklearn.pipeline.Pipeline`::
-
-  >>> scaler = preprocessing.StandardScaler().fit(X_train)
-  >>> scaler
-  StandardScaler()
-
-  >>> scaler.mean_
-  array([1. ..., 0. ..., 0.33...])
-
-  >>> scaler.scale_
-  array([0.81..., 0.81..., 1.24...])
-
-  >>> scaler.transform(X_train)
-  array([[ 0.  ..., -1.22...,  1.33...],
-         [ 1.22...,  0.  ..., -0.26...],
-         [-1.22...,  1.22..., -1.06...]])
+This class implements the ``Transformer`` API to compute the mean and
+standard deviation on a training set so as to be able to later re-apply the
+same transformation on the testing set. This class is hence suitable for
+use in the early steps of a :class:`~sklearn.pipeline.Pipeline`::
 
+  >>> from sklearn.datasets import make_classification
+  >>> from sklearn.linear_model import LogisticRegression
+  >>> from sklearn.model_selection import train_test_split
+  >>> from sklearn.pipeline import make_pipeline
+  >>> from sklearn.preprocessing import StandardScaler
 
-The scaler instance can then be used on new data to transform it the
-same way it did on the training set::
+  >>> X, y = make_classification(random_state=42)
+  >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+  >>> pipe = make_pipeline(StandardScaler(), LogisticRegression())
+  >>> pipe.fit(X_train, y_train)  # apply scaling on training data
+  Pipeline(steps=[('standardscaler', StandardScaler()),
+                  ('logisticregression', LogisticRegression())])
 
-  >>> X_test = [[-1., 1., 0.]]
-  >>> scaler.transform(X_test)
-  array([[-2.44...,  1.22..., -0.26...]])
+  >>> pipe.score(X_test, y_test)  # apply scaling on testing data, without leaking training data.
+  0.96
 
 It is possible to disable either centering or scaling by either
 passing ``with_mean=False`` or ``with_std=False`` to the constructor
@@ -181,20 +183,15 @@ Here is how to use the toy data from the
   array([2.,  1.,  2.])
 
 
-As with :func:`scale`, the module further provides convenience functions
-:func:`minmax_scale` and :func:`maxabs_scale` if you don't want to create
-an object.
-
-
 Scaling sparse data
 -------------------
 Centering sparse data would destroy the sparseness structure in the data, and
 thus rarely is a sensible thing to do. However, it can make sense to scale
 sparse inputs, especially if features are on different scales.
 
-:class:`MaxAbsScaler`  and :func:`maxabs_scale` were specifically designed
-for scaling sparse data, and are the recommended way to go about this.
-However, :func:`scale` and :class:`StandardScaler` can accept ``scipy.sparse``
+:class:`MaxAbsScaler` was specifically designed for scaling
+sparse data, and is the recommended way to go about this.
+However, :class:`StandardScaler` can accept ``scipy.sparse``
 matrices  as input, as long as ``with_mean=False`` is explicitly passed
 to the constructor. Otherwise a ``ValueError`` will be raised as
 silently centering would break the sparsity and would often crash the
@@ -218,9 +215,8 @@ Scaling data with outliers
 
 If your data contains many outliers, scaling using the mean and variance
 of the data is likely to not work very well. In these cases, you can use
-:func:`robust_scale` and :class:`RobustScaler` as drop-in replacements
-instead. They use more robust estimates for the center and range of your
-data.
+:class:`RobustScaler` as a drop-in replacement instead. It uses
+more robust estimates for the center and range of your data.
 
 
 .. topic:: References:
@@ -235,25 +231,74 @@ data.
   independently, since a downstream model can further make some assumption
   on the linear independence of the features.
 
-  To address this issue you can use :class:`sklearn.decomposition.PCA` with
+  To address this issue you can use :class:`~sklearn.decomposition.PCA` with
   ``whiten=True`` to further remove the linear correlation across features.
 
-.. topic:: Scaling a 1D array
-
-   All above functions (i.e. :func:`scale`, :func:`minmax_scale`,
-   :func:`maxabs_scale`, and :func:`robust_scale`) accept 1D array which can be
-   useful in some specific case.
-
 .. _kernel_centering:
 
 Centering kernel matrices
 -------------------------
 
 If you have a kernel matrix of a kernel :math:`K` that computes a dot product
-in a feature space defined by function :math:`\phi`,
-a :class:`KernelCenterer` can transform the kernel matrix
-so that it contains inner products in the feature space
-defined by :math:`\phi` followed by removal of the mean in that space.
+in a feature space (possibly implicitly) defined by a function
+:math:`\phi(\cdot)`, a :class:`KernelCenterer` can transform the kernel matrix
+so that it contains inner products in the feature space defined by :math:`\phi`
+followed by the removal of the mean in that space. In other words,
+:class:`KernelCenterer` computes the centered Gram matrix associated to a
+positive semidefinite kernel :math:`K`.
+
+**Mathematical formulation**
+
+We can have a look at the mathematical formulation now that we have the
+intuition. Let :math:`K` be a kernel matrix of shape `(n_samples, n_samples)`
+computed from :math:`X`, a data matrix of shape `(n_samples, n_features)`,
+during the `fit` step. :math:`K` is defined by
+
+.. math::
+  K(X, X) = \phi(X) . \phi(X)^{T}
+
+:math:`\phi(X)` is a function mapping of :math:`X` to a Hilbert space. A
+centered kernel :math:`\tilde{K}` is defined as:
+
+.. math::
+  \tilde{K}(X, X) = \tilde{\phi}(X) . \tilde{\phi}(X)^{T}
+
+where :math:`\tilde{\phi}(X)` results from centering :math:`\phi(X)` in the
+Hilbert space.
+
+Thus, one could compute :math:`\tilde{K}` by mapping :math:`X` using the
+function :math:`\phi(\cdot)` and center the data in this new space. However,
+kernels are often used because they allows some algebra calculations that
+avoid computing explicitly this mapping using :math:`\phi(\cdot)`. Indeed, one
+can implicitly center as shown in Appendix B in [Scholkopf1998]_:
+
+.. math::
+  \tilde{K} = K - 1_{\text{n}_{samples}} K - K 1_{\text{n}_{samples}} + 1_{\text{n}_{samples}} K 1_{\text{n}_{samples}}
+
+:math:`1_{\text{n}_{samples}}` is a matrix of `(n_samples, n_samples)` where
+all entries are equal to :math:`\frac{1}{\text{n}_{samples}}`. In the
+`transform` step, the kernel becomes :math:`K_{test}(X, Y)` defined as:
+
+.. math::
+  K_{test}(X, Y) = \phi(Y) . \phi(X)^{T}
+
+:math:`Y` is the test dataset of shape `(n_samples_test, n_features)` and thus
+:math:`K_{test}` is of shape `(n_samples_test, n_samples)`. In this case,
+centering :math:`K_{test}` is done as:
+
+.. math::
+  \tilde{K}_{test}(X, Y) = K_{test} - 1'_{\text{n}_{samples}} K - K_{test} 1_{\text{n}_{samples}} + 1'_{\text{n}_{samples}} K 1_{\text{n}_{samples}}
+
+:math:`1'_{\text{n}_{samples}}` is a matrix of shape
+`(n_samples_test, n_samples)` where all entries are equal to
+:math:`\frac{1}{\text{n}_{samples}}`.
+
+.. topic:: References
+
+  .. [Scholkopf1998] B. Schölkopf, A. Smola, and K.R. Müller,
+    `"Nonlinear component analysis as a kernel eigenvalue problem."
+    <https://www.mlpack.org/papers/kpca.pdf>`_
+    Neural computation 10.5 (1998): 1299-1319.
 
 .. _preprocessing_transformer:
 
@@ -284,8 +329,8 @@ data from any distribution to as close t
 Mapping to a Uniform distribution
 ---------------------------------
 
-:class:`QuantileTransformer` and :func:`quantile_transform` provide a
-non-parametric transformation to map the data to a uniform distribution
+:class:`QuantileTransformer` provides a non-parametric
+transformation to map the data to a uniform distribution
 with values between 0 and 1::
 
   >>> from sklearn.datasets import load_iris
@@ -414,8 +459,8 @@ This assumption is the base of the `Vect
 classification and clustering contexts.
 
 The function :func:`normalize` provides a quick and easy way to perform this
-operation on a single array-like dataset, either using the ``l1`` or ``l2``
-norms::
+operation on a single array-like dataset, either using the ``l1``, ``l2``, or
+``max`` norms::
 
   >>> X = [[ 1., -1.,  2.],
   ...      [ 2.,  0.,  0.],
@@ -433,7 +478,7 @@ The ``preprocessing`` module further pro
 the class is stateless as this operation treats samples independently).
 
 This class is hence suitable for use in the early steps of a
-:class:`sklearn.pipeline.Pipeline`::
+:class:`~sklearn.pipeline.Pipeline`::
 
   >>> normalizer = preprocessing.Normalizer().fit(X)  # fit does nothing
   >>> normalizer
@@ -492,6 +537,43 @@ scikit-learn estimators, as these expect
 the categories as being ordered, which is often not desired (i.e. the set of
 browsers was ordered arbitrarily).
 
+By default, :class:`OrdinalEncoder` will also passthrough missing values that
+are indicated by `np.nan`.
+
+    >>> enc = preprocessing.OrdinalEncoder()
+    >>> X = [['male'], ['female'], [np.nan], ['female']]
+    >>> enc.fit_transform(X)
+    array([[ 1.],
+           [ 0.],
+           [nan],
+           [ 0.]])
+
+:class:`OrdinalEncoder` provides a parameter `encoded_missing_value` to encode
+the missing values without the need to create a pipeline and using
+:class:`~sklearn.impute.SimpleImputer`.
+
+    >>> enc = preprocessing.OrdinalEncoder(encoded_missing_value=-1)
+    >>> X = [['male'], ['female'], [np.nan], ['female']]
+    >>> enc.fit_transform(X)
+    array([[ 1.],
+           [ 0.],
+           [-1.],
+           [ 0.]])
+
+The above processing is equivalent to the following pipeline::
+
+    >>> from sklearn.pipeline import Pipeline
+    >>> from sklearn.impute import SimpleImputer
+    >>> enc = Pipeline(steps=[
+    ...     ("encoder", preprocessing.OrdinalEncoder()),
+    ...     ("imputer", SimpleImputer(strategy="constant", fill_value=-1)),
+    ... ])
+    >>> enc.fit_transform(X)
+    array([[ 1.],
+           [ 0.],
+           [-1.],
+           [ 0.]])
+
 Another possibility to convert categorical features to features that can be used
 with scikit-learn estimators is to use a one-of-K, also known as one-hot or
 dummy encoding.
@@ -538,17 +620,19 @@ dataset::
     array([[1., 0., 0., 1., 0., 0., 1., 0., 0., 0.]])
 
 If there is a possibility that the training data might have missing categorical
-features, it can often be better to specify ``handle_unknown='ignore'`` instead
-of setting the ``categories`` manually as above. When
-``handle_unknown='ignore'`` is specified and unknown categories are encountered
-during transform, no error will be raised but the resulting one-hot encoded
-columns for this feature will be all zeros
-(``handle_unknown='ignore'`` is only supported for one-hot encoding)::
+features, it can often be better to specify
+`handle_unknown='infrequent_if_exist'` instead of setting the `categories`
+manually as above. When `handle_unknown='infrequent_if_exist'` is specified
+and unknown categories are encountered during transform, no error will be
+raised but the resulting one-hot encoded columns for this feature will be all
+zeros or considered as an infrequent category if enabled.
+(`handle_unknown='infrequent_if_exist'` is only supported for one-hot
+encoding)::
 
-    >>> enc = preprocessing.OneHotEncoder(handle_unknown='ignore')
+    >>> enc = preprocessing.OneHotEncoder(handle_unknown='infrequent_if_exist')
     >>> X = [['male', 'from US', 'uses Safari'], ['female', 'from Europe', 'uses Firefox']]
     >>> enc.fit(X)
-    OneHotEncoder(handle_unknown='ignore')
+    OneHotEncoder(handle_unknown='infrequent_if_exist')
     >>> enc.transform([['female', 'from Asia', 'uses Chrome']]).toarray()
     array([[1., 0., 0., 0., 0., 0.]])
 
@@ -559,15 +643,14 @@ parameter allows the user to specify a c
 This is useful to avoid co-linearity in the input matrix in some classifiers.
 Such functionality is useful, for example, when using non-regularized
 regression (:class:`LinearRegression <sklearn.linear_model.LinearRegression>`),
-since co-linearity would cause the covariance matrix to be non-invertible.
-When this parameter is not None, ``handle_unknown`` must be set to
-``error``::
+since co-linearity would cause the covariance matrix to be non-invertible::
 
     >>> X = [['male', 'from US', 'uses Safari'],
     ...      ['female', 'from Europe', 'uses Firefox']]
     >>> drop_enc = preprocessing.OneHotEncoder(drop='first').fit(X)
     >>> drop_enc.categories_
-    [array(['female', 'male'], dtype=object), array(['from Europe', 'from US'], dtype=object), array(['uses Firefox', 'uses Safari'], dtype=object)]
+    [array(['female', 'male'], dtype=object), array(['from Europe', 'from US'], dtype=object),
+     array(['uses Firefox', 'uses Safari'], dtype=object)]
     >>> drop_enc.transform(X).toarray()
     array([[1., 1., 1.],
            [0., 0., 0.]])
@@ -580,7 +663,8 @@ categories. In this case, you can set th
     ...      ['female', 'Asia', 'Chrome']]
     >>> drop_enc = preprocessing.OneHotEncoder(drop='if_binary').fit(X)
     >>> drop_enc.categories_
-    [array(['female', 'male'], dtype=object), array(['Asia', 'Europe', 'US'], dtype=object), array(['Chrome', 'Firefox', 'Safari'], dtype=object)]
+    [array(['female', 'male'], dtype=object), array(['Asia', 'Europe', 'US'], dtype=object),
+     array(['Chrome', 'Firefox', 'Safari'], dtype=object)]
     >>> drop_enc.transform(X).toarray()
     array([[1., 0., 0., 1., 0., 0., 1.],
            [0., 0., 1., 0., 0., 1., 0.],
@@ -590,9 +674,162 @@ In the transformed `X`, the first column
 categories "male"/"female", while the remaining 6 columns is the encoding of
 the 2 features with respectively 3 categories each.
 
+When `handle_unknown='ignore'` and `drop` is not None, unknown categories will
+be encoded as all zeros::
+
+    >>> drop_enc = preprocessing.OneHotEncoder(drop='first',
+    ...                                        handle_unknown='ignore').fit(X)
+    >>> X_test = [['unknown', 'America', 'IE']]
+    >>> drop_enc.transform(X_test).toarray()
+    array([[0., 0., 0., 0., 0.]])
+
+All the categories in `X_test` are unknown during transform and will be mapped
+to all zeros. This means that unknown categories will have the same mapping as
+the dropped category. :meth`OneHotEncoder.inverse_transform` will map all zeros
+to the dropped category if a category is dropped and `None` if a category is
+not dropped::
+
+    >>> drop_enc = preprocessing.OneHotEncoder(drop='if_binary', sparse=False,
+    ...                                        handle_unknown='ignore').fit(X)
+    >>> X_test = [['unknown', 'America', 'IE']]
+    >>> X_trans = drop_enc.transform(X_test)
+    >>> X_trans
+    array([[0., 0., 0., 0., 0., 0., 0.]])
+    >>> drop_enc.inverse_transform(X_trans)
+    array([['female', None, None]], dtype=object)
+
+:class:`OneHotEncoder` supports categorical features with missing values by
+considering the missing values as an additional category::
+
+    >>> X = [['male', 'Safari'],
+    ...      ['female', None],
+    ...      [np.nan, 'Firefox']]
+    >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X)
+    >>> enc.categories_
+    [array(['female', 'male', nan], dtype=object),
+     array(['Firefox', 'Safari', None], dtype=object)]
+    >>> enc.transform(X).toarray()
+    array([[0., 1., 0., 0., 1., 0.],
+           [1., 0., 0., 0., 0., 1.],
+           [0., 0., 1., 1., 0., 0.]])
+
+If a feature contains both `np.nan` and `None`, they will be considered
+separate categories::
+
+    >>> X = [['Safari'], [None], [np.nan], ['Firefox']]
+    >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X)
+    >>> enc.categories_
+    [array(['Firefox', 'Safari', None, nan], dtype=object)]
+    >>> enc.transform(X).toarray()
+    array([[0., 1., 0., 0.],
+           [0., 0., 1., 0.],
+           [0., 0., 0., 1.],
+           [1., 0., 0., 0.]])
+
 See :ref:`dict_feature_extraction` for categorical features that are
 represented as a dict, not as scalars.
 
+.. _one_hot_encoder_infrequent_categories:
+
+Infrequent categories
+---------------------
+
+:class:`OneHotEncoder` supports aggregating infrequent categories into a single
+output for each feature. The parameters to enable the gathering of infrequent
+categories are `min_frequency` and `max_categories`.
+
+1. `min_frequency` is either an  integer greater or equal to 1, or a float in
+   the interval `(0.0, 1.0)`. If `min_frequency` is an integer, categories with
+   a cardinality smaller than `min_frequency`  will be considered infrequent.
+   If `min_frequency` is a float, categories with a cardinality smaller than
+   this fraction of the total number of samples will be considered infrequent.
+   The default value is 1, which means every category is encoded separately.
+
+2. `max_categories` is either `None` or any integer greater than 1. This
+   parameter sets an upper limit to the number of output features for each
+   input feature. `max_categories` includes the feature that combines
+   infrequent categories.
+
+In the following example, the categories, `'dog', 'snake'` are considered
+infrequent::
+
+   >>> X = np.array([['dog'] * 5 + ['cat'] * 20 + ['rabbit'] * 10 +
+   ...               ['snake'] * 3], dtype=object).T
+   >>> enc = preprocessing.OneHotEncoder(min_frequency=6, sparse=False).fit(X)
+   >>> enc.infrequent_categories_
+   [array(['dog', 'snake'], dtype=object)]
+   >>> enc.transform(np.array([['dog'], ['cat'], ['rabbit'], ['snake']]))
+   array([[0., 0., 1.],
+          [1., 0., 0.],
+          [0., 1., 0.],
+          [0., 0., 1.]])
+
+By setting handle_unknown to `'infrequent_if_exist'`, unknown categories will
+be considered infrequent::
+
+   >>> enc = preprocessing.OneHotEncoder(
+   ...    handle_unknown='infrequent_if_exist', sparse=False, min_frequency=6)
+   >>> enc = enc.fit(X)
+   >>> enc.transform(np.array([['dragon']]))
+   array([[0., 0., 1.]])
+
+:meth:`OneHotEncoder.get_feature_names_out` uses 'infrequent' as the infrequent
+feature name::
+
+   >>> enc.get_feature_names_out()
+   array(['x0_cat', 'x0_rabbit', 'x0_infrequent_sklearn'], dtype=object)
+
+When `'handle_unknown'` is set to `'infrequent_if_exist'` and an unknown
+category is encountered in transform:
+
+1. If infrequent category support was not configured or there was no
+   infrequent category during training, the resulting one-hot encoded columns
+   for this feature will be all zeros. In the inverse transform, an unknown
+   category will be denoted as `None`.
+
+2. If there is an infrequent category during training, the unknown category
+   will be considered infrequent. In the inverse transform, 'infrequent_sklearn'
+   will be used to represent the infrequent category.
+
+Infrequent categories can also be configured using `max_categories`. In the
+following example, we set `max_categories=2` to limit the number of features in
+the output. This will result in all but the `'cat'` category to be considered
+infrequent, leading to two features, one for `'cat'` and one for infrequent
+categories - which are all the others::
+
+   >>> enc = preprocessing.OneHotEncoder(max_categories=2, sparse=False)
+   >>> enc = enc.fit(X)
+   >>> enc.transform([['dog'], ['cat'], ['rabbit'], ['snake']])
+   array([[0., 1.],
+          [1., 0.],
+          [0., 1.],
+          [0., 1.]])
+
+If both `max_categories` and `min_frequency` are non-default values, then
+categories are selected based on `min_frequency` first and `max_categories`
+categories are kept. In the following example, `min_frequency=4` considers
+only `snake` to be infrequent, but `max_categories=3`, forces `dog` to also be
+infrequent::
+
+   >>> enc = preprocessing.OneHotEncoder(min_frequency=4, max_categories=3, sparse=False)
+   >>> enc = enc.fit(X)
+   >>> enc.transform([['dog'], ['cat'], ['rabbit'], ['snake']])
+   array([[0., 0., 1.],
+          [1., 0., 0.],
+          [0., 1., 0.],
+          [0., 0., 1.]])
+
+If there are infrequent categories with the same cardinality at the cutoff of
+`max_categories`, then then the first `max_categories` are taken based on lexicon
+ordering. In the following example, "b", "c", and "d", have the same cardinality
+and with `max_categories=2`, "b" and "c" are infrequent because they have a higher
+lexicon order.
+
+   >>> X = np.asarray([["a"] * 20 + ["b"] * 10 + ["c"] * 10 + ["d"] * 10], dtype=object).T
+   >>> enc = preprocessing.OneHotEncoder(max_categories=3).fit(X)
+   >>> enc.infrequent_categories_
+   [array(['b', 'c'], dtype=object)]
+
 .. _preprocessing_discretization:
 
 Discretization
@@ -606,7 +843,9 @@ of continuous attributes to one with onl
 
 One-hot encoded discretized features can make a model more expressive, while
 maintaining interpretability. For instance, pre-processing with a discretizer
-can introduce nonlinearity to linear models.
+can introduce nonlinearity to linear models. For more advanced possibilities,
+in particular smooth ones, see :ref:`generating_polynomial_features` further
+below.
 
 K-bins discretization
 ---------------------
@@ -637,7 +876,7 @@ Based on these bin intervals, ``X`` is t
          [ 2., 0., 0.]])
 
 The resulting dataset contains ordinal attributes which can be further used
-in a :class:`sklearn.pipeline.Pipeline`.
+in a :class:`~sklearn.pipeline.Pipeline`.
 
 Discretization is similar to constructing histograms for continuous data.
 However, histograms focus on counting features which fall into particular
@@ -649,6 +888,22 @@ constant-width bins. The 'quantile' stra
 equally populated bins in each feature. The 'kmeans' strategy defines bins based
 on a k-means clustering procedure performed on each feature independently.
 
+Be aware that one can specify custom bins by passing a callable defining the
+discretization strategy to :class:`~sklearn.preprocessing.FunctionTransformer`.
+For instance, we can use the Pandas function :func:`pandas.cut`::
+
+  >>> import pandas as pd
+  >>> import numpy as np
+  >>> bins = [0, 1, 13, 20, 60, np.inf]
+  >>> labels = ['infant', 'kid', 'teen', 'adult', 'senior citizen']
+  >>> transformer = preprocessing.FunctionTransformer(
+  ...     pd.cut, kw_args={'bins': bins, 'labels': labels, 'retbins': False}
+  ... )
+  >>> X = np.array([0.2, 2, 15, 25, 97])
+  >>> transformer.fit_transform(X)
+  ['infant', 'kid', 'teen', 'adult', 'senior citizen']
+  Categories (5, object): ['infant' < 'kid' < 'teen' < 'adult' < 'senior citizen']
+
 .. topic:: Examples:
 
   * :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization.py`
@@ -665,7 +920,7 @@ features to get boolean values**. This c
 probabilistic estimators that make assumption that the input data
 is distributed according to a multi-variate `Bernoulli distribution
 <https://en.wikipedia.org/wiki/Bernoulli_distribution>`_. For instance,
-this is the case for the :class:`sklearn.neural_network.BernoulliRBM`.
+this is the case for the :class:`~sklearn.neural_network.BernoulliRBM`.
 
 It is also common among the text processing community to use binary
 feature values (probably to simplify the probabilistic reasoning) even
@@ -674,7 +929,7 @@ often perform slightly better in practic
 
 As for the :class:`Normalizer`, the utility class
 :class:`Binarizer` is meant to be used in the early stages of
-:class:`sklearn.pipeline.Pipeline`. The ``fit`` method does nothing
+:class:`~sklearn.pipeline.Pipeline`. The ``fit`` method does nothing
 as each sample is treated independently of others::
 
   >>> X = [[ 1., -1.,  2.],
@@ -698,8 +953,8 @@ It is possible to adjust the threshold o
          [1., 0., 0.],
          [0., 0., 0.]])
 
-As for the :class:`StandardScaler` and :class:`Normalizer` classes, the
-preprocessing module provides a companion function :func:`binarize`
+As for the :class:`Normalizer` class, the preprocessing module
+provides a companion function :func:`binarize`
 to be used when the transformer API is not necessary.
 
 Note that the :class:`Binarizer` is similar to the :class:`KBinsDiscretizer`
@@ -722,12 +977,24 @@ Imputation of missing values
 
 Tools for imputing missing values are discussed at :ref:`impute`.
 
-.. _polynomial_features:
+.. _generating_polynomial_features:
 
 Generating polynomial features
 ==============================
 
-Often it's useful to add complexity to the model by considering nonlinear features of the input data. A simple and common method to use is polynomial features, which can get features' high-order and interaction terms. It is implemented in :class:`PolynomialFeatures`::
+Often it's useful to add complexity to a model by considering nonlinear
+features of the input data. We show two possibilities that are both based on
+polynomials: The first one uses pure polynomials, the second one uses splines,
+i.e. piecewise polynomials.
+
+.. _polynomial_features:
+
+Polynomial features
+-------------------
+
+A simple and common method to use is polynomial features, which can get
+features' high-order and interaction terms. It is implemented in
+:class:`PolynomialFeatures`::
 
     >>> import numpy as np
     >>> from sklearn.preprocessing import PolynomialFeatures
@@ -742,9 +1009,11 @@ Often it's useful to add complexity to t
            [ 1.,  2.,  3.,  4.,  6.,  9.],
            [ 1.,  4.,  5., 16., 20., 25.]])
 
-The features of X have been transformed from :math:`(X_1, X_2)` to :math:`(1, X_1, X_2, X_1^2, X_1X_2, X_2^2)`.
+The features of X have been transformed from :math:`(X_1, X_2)` to
+:math:`(1, X_1, X_2, X_1^2, X_1X_2, X_2^2)`.
 
-In some cases, only interaction terms among features are required, and it can be gotten with the setting ``interaction_only=True``::
+In some cases, only interaction terms among features are required, and it can
+be gotten with the setting ``interaction_only=True``::
 
     >>> X = np.arange(9).reshape(3, 3)
     >>> X
@@ -757,11 +1026,94 @@ In some cases, only interaction terms am
            [  1.,   3.,   4.,   5.,  12.,  15.,  20.,  60.],
            [  1.,   6.,   7.,   8.,  42.,  48.,  56., 336.]])
 
-The features of X have been transformed from :math:`(X_1, X_2, X_3)` to :math:`(1, X_1, X_2, X_3, X_1X_2, X_1X_3, X_2X_3, X_1X_2X_3)`.
+The features of X have been transformed from :math:`(X_1, X_2, X_3)` to
+:math:`(1, X_1, X_2, X_3, X_1X_2, X_1X_3, X_2X_3, X_1X_2X_3)`.
+
+Note that polynomial features are used implicitly in `kernel methods
+<https://en.wikipedia.org/wiki/Kernel_method>`_ (e.g., :class:`~sklearn.svm.SVC`,
+:class:`~sklearn.decomposition.KernelPCA`) when using polynomial :ref:`svm_kernels`.
+
+See :ref:`sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py`
+for Ridge regression using created polynomial features.
+
+.. _spline_transformer:
+
+Spline transformer
+------------------
+
+Another way to add nonlinear terms instead of pure polynomials of features is
+to generate spline basis functions for each feature with the
+:class:`SplineTransformer`. Splines are piecewise polynomials, parametrized by
+their polynomial degree and the positions of the knots. The
+:class:`SplineTransformer` implements a B-spline basis, cf. the references
+below.
+
+.. note::
+
+    The :class:`SplineTransformer` treats each feature separately, i.e. it
+    won't give you interaction terms.
+
+Some of the advantages of splines over polynomials are:
+
+    - B-splines are very flexible and robust if you keep a fixed low degree,
+      usually 3, and parsimoniously adapt the number of knots. Polynomials
+      would need a higher degree, which leads to the next point.
+    - B-splines do not have oscillatory behaviour at the boundaries as have
+      polynomials (the higher the degree, the worse). This is known as `Runge's
+      phenomenon <https://en.wikipedia.org/wiki/Runge%27s_phenomenon>`_.
+    - B-splines provide good options for extrapolation beyond the boundaries,
+      i.e. beyond the range of fitted values. Have a look at the option
+      ``extrapolation``.
+    - B-splines generate a feature matrix with a banded structure. For a single
+      feature, every row contains only ``degree + 1`` non-zero elements, which
+      occur consecutively and are even positive. This results in a matrix with
+      good numerical properties, e.g. a low condition number, in sharp contrast
+      to a matrix of polynomials, which goes under the name
+      `Vandermonde matrix <https://en.wikipedia.org/wiki/Vandermonde_matrix>`_.
+      A low condition number is important for stable algorithms of linear
+      models.
+
+The following code snippet shows splines in action::
+
+    >>> import numpy as np
+    >>> from sklearn.preprocessing import SplineTransformer
+    >>> X = np.arange(5).reshape(5, 1)
+    >>> X
+    array([[0],
+           [1],
+           [2],
+           [3],
+           [4]])
+    >>> spline = SplineTransformer(degree=2, n_knots=3)
+    >>> spline.fit_transform(X)
+    array([[0.5  , 0.5  , 0.   , 0.   ],
+           [0.125, 0.75 , 0.125, 0.   ],
+           [0.   , 0.5  , 0.5  , 0.   ],
+           [0.   , 0.125, 0.75 , 0.125],
+           [0.   , 0.   , 0.5  , 0.5  ]])
+
+As the ``X`` is sorted, one can easily see the banded matrix output. Only the
+three middle diagonals are non-zero for ``degree=2``. The higher the degree,
+the more overlapping of the splines.
+
+Interestingly, a :class:`SplineTransformer` of ``degree=0`` is the same as
+:class:`~sklearn.preprocessing.KBinsDiscretizer` with
+``encode='onehot-dense'`` and ``n_bins = n_knots - 1`` if
+``knots = strategy``.
+
+.. topic:: Examples:
+
+    * :ref:`sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py`
+    * :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`
+
+.. topic:: References:
 
-Note that polynomial features are used implicitly in `kernel methods <https://en.wikipedia.org/wiki/Kernel_method>`_ (e.g., :class:`sklearn.svm.SVC`, :class:`sklearn.decomposition.KernelPCA`) when using polynomial :ref:`svm_kernels`.
+    * Eilers, P., & Marx, B. (1996). :doi:`Flexible Smoothing with B-splines and
+      Penalties <10.1214/ss/1038425655>`. Statist. Sci. 11 (1996), no. 2, 89--121.
 
-See :ref:`sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py` for Ridge regression using created polynomial features.
+    * Perperoglou, A., Sauerbrei, W., Abrahamowicz, M. et al. :doi:`A review of
+      spline function procedures in R <10.1186/s12874-019-0666-3>`.
+      BMC Med Res Methodol 19, 46 (2019).
 
 .. _function_transformer:
 
@@ -777,6 +1129,7 @@ a transformer that applies a log transfo
     >>> from sklearn.preprocessing import FunctionTransformer
     >>> transformer = FunctionTransformer(np.log1p, validate=True)
     >>> X = np.array([[0, 1], [2, 3]])
+    >>> # Since FunctionTransformer is no-op during fit, we can call transform directly
     >>> transformer.transform(X)
     array([[0.        , 0.69314718],
            [1.09861229, 1.38629436]])
@@ -791,5 +1144,6 @@ error with a ``filterwarnings``::
   ...                         category=UserWarning, append=False)
 
 For a full code example that demonstrates using a :class:`FunctionTransformer`
-to do custom feature selection,
-see :ref:`sphx_glr_auto_examples_preprocessing_plot_function_transformer.py`
+to extract features from text data see
+:ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py` and
+:ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`.
diff -pruN 0.23.2-5/doc/modules/preprocessing_targets.rst 1.1.1-1/doc/modules/preprocessing_targets.rst
--- 0.23.2-5/doc/modules/preprocessing_targets.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/preprocessing_targets.rst	2022-05-19 12:16:26.456783000 +0000
@@ -12,10 +12,13 @@ you want to transform the prediction tar
 model in the original (untransformed) space.
 
 Label binarization
-------------------
+==================
 
-:class:`LabelBinarizer` is a utility class to help create a label indicator
-matrix from a list of multi-class labels::
+LabelBinarizer
+--------------
+
+:class:`LabelBinarizer` is a utility class to help create a :term:`label
+indicator matrix` from a list of :term:`multiclass` labels::
 
     >>> from sklearn import preprocessing
     >>> lb = preprocessing.LabelBinarizer()
@@ -27,17 +30,47 @@ matrix from a list of multi-class labels
     array([[1, 0, 0, 0],
            [0, 0, 0, 1]])
 
-For multiple labels per instance, use :class:`MultiLabelBinarizer`::
+Using this format can enable multiclass classification in estimators
+that support the label indicator matrix format.
 
-    >>> lb = preprocessing.MultiLabelBinarizer()
-    >>> lb.fit_transform([(1, 2), (3,)])
-    array([[1, 1, 0],
-           [0, 0, 1]])
-    >>> lb.classes_
-    array([1, 2, 3])
+.. warning::
+
+    LabelBinarizer is not needed if you are using an estimator that
+    already supports :term:`multiclass` data.
+
+For more information about multiclass classification, refer to
+:ref:`multiclass_classification`.
+
+MultiLabelBinarizer
+-------------------
+
+In :term:`multilabel` learning, the joint set of binary classification tasks is
+expressed with a label binary indicator array: each sample is one row of a 2d
+array of shape (n_samples, n_classes) with binary values where the one, i.e. the
+non zero elements, corresponds to the subset of labels for that sample. An array
+such as ``np.array([[1, 0, 0], [0, 1, 1], [0, 0, 0]])`` represents label 0 in the
+first sample, labels 1 and 2 in the second sample, and no labels in the third
+sample.
+
+Producing multilabel data as a list of sets of labels may be more intuitive.
+The :class:`MultiLabelBinarizer <sklearn.preprocessing.MultiLabelBinarizer>`
+transformer can be used to convert between a collection of collections of
+labels and the indicator format::
+
+    >>> from sklearn.preprocessing import MultiLabelBinarizer
+    >>> y = [[2, 3, 4], [2], [0, 1, 3], [0, 1, 2, 3, 4], [0, 1, 2]]
+    >>> MultiLabelBinarizer().fit_transform(y)
+    array([[0, 0, 1, 1, 1],
+           [0, 0, 1, 0, 0],
+           [1, 1, 0, 1, 0],
+           [1, 1, 1, 1, 1],
+           [1, 1, 1, 0, 0]])
+
+For more information about multilabel classification, refer to
+:ref:`multilabel_classification`.
 
 Label encoding
---------------
+==============
 
 :class:`LabelEncoder` is a utility class to help normalize labels such that
 they contain only values between 0 and n_classes-1. This is sometimes useful
diff -pruN 0.23.2-5/doc/modules/random_projection.rst 1.1.1-1/doc/modules/random_projection.rst
--- 0.23.2-5/doc/modules/random_projection.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/random_projection.rst	2022-05-19 12:16:26.456783000 +0000
@@ -52,7 +52,7 @@ The main theoretical result behind the e
   and can even be taken to be an orthogonal projection.
 
 Knowing only the number of samples, the
-:func:`sklearn.random_projection.johnson_lindenstrauss_min_dim` estimates
+:func:`johnson_lindenstrauss_min_dim` estimates
 conservatively the minimal size of the random subspace to guarantee a
 bounded distortion introduced by the random projection::
 
@@ -90,7 +90,7 @@ bounded distortion introduced by the ran
 
 Gaussian random projection
 ==========================
-The :class:`sklearn.random_projection.GaussianRandomProjection` reduces the
+The :class:`GaussianRandomProjection` reduces the
 dimensionality by projecting the original input space on a randomly generated
 matrix where components are drawn from the following distribution
 :math:`N(0, \frac{1}{n_{components}})`.
@@ -111,7 +111,7 @@ projection transformer::
 
 Sparse random projection
 ========================
-The :class:`sklearn.random_projection.SparseRandomProjection` reduces the
+The :class:`SparseRandomProjection` reduces the
 dimensionality by projecting the original input space using a sparse
 random matrix.
 
@@ -152,7 +152,7 @@ projection transformer::
 
  * D. Achlioptas. 2003.
    `Database-friendly random projections: Johnson-Lindenstrauss  with binary
-   coins <http://www.cs.ucsc.edu/~optas/papers/jl.pdf>`_.
+   coins <https://www.sciencedirect.com/science/article/pii/S0022000003000254>`_.
    Journal of Computer and System Sciences 66 (2003) 671–687
 
  * Ping Li, Trevor J. Hastie, and Kenneth W. Church. 2006.
@@ -160,3 +160,42 @@ projection transformer::
    In Proceedings of the 12th ACM SIGKDD international conference on
    Knowledge discovery and data mining (KDD '06). ACM, New York, NY, USA,
    287-296.
+
+
+.. _random_projection_inverse_transform:
+
+Inverse Transform
+=================
+The random projection transformers have ``compute_inverse_components`` parameter. When
+set to True, after creating the random ``components_`` matrix during fitting,
+the transformer computes the pseudo-inverse of this matrix and stores it as
+``inverse_components_``. The ``inverse_components_`` matrix has shape
+:math:`n_{features} \times n_{components}`, and it is always a dense matrix,
+regardless of whether the components matrix is sparse or dense. So depending on
+the number of features and components, it may use a lot of memory.
+
+When the ``inverse_transform`` method is called, it computes the product of the
+input ``X`` and the transpose of the inverse components. If the inverse components have
+been computed during fit, they are reused at each call to ``inverse_transform``.
+Otherwise they are recomputed each time, which can be costly. The result is always
+dense, even if ``X`` is sparse.
+
+Here a small code example which illustrates how to use the inverse transform
+feature::
+
+  >>> import numpy as np
+  >>> from sklearn.random_projection import SparseRandomProjection
+  >>> X = np.random.rand(100, 10000)
+  >>> transformer = SparseRandomProjection(
+  ...   compute_inverse_components=True
+  ... )
+  ...
+  >>> X_new = transformer.fit_transform(X)
+  >>> X_new.shape
+  (100, 3947)
+  >>> X_new_inversed = transformer.inverse_transform(X_new)
+  >>> X_new_inversed.shape
+  (100, 10000)
+  >>> X_new_again = transformer.transform(X_new_inversed)
+  >>> np.allclose(X_new, X_new_again)
+  True
diff -pruN 0.23.2-5/doc/modules/semi_supervised.rst 1.1.1-1/doc/modules/semi_supervised.rst
--- 0.23.2-5/doc/modules/semi_supervised.rst	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/modules/semi_supervised.rst	2022-05-19 12:16:26.456783000 +0000
@@ -0,0 +1,151 @@
+.. _semi_supervised:
+
+===================================================
+Semi-supervised learning
+===================================================
+
+.. currentmodule:: sklearn.semi_supervised
+
+`Semi-supervised learning
+<https://en.wikipedia.org/wiki/Semi-supervised_learning>`_ is a situation
+in which in your training data some of the samples are not labeled. The
+semi-supervised estimators in :mod:`sklearn.semi_supervised` are able to
+make use of this additional unlabeled data to better capture the shape of
+the underlying data distribution and generalize better to new samples.
+These algorithms can perform well when we have a very small amount of
+labeled points and a large amount of unlabeled points.
+
+.. topic:: Unlabeled entries in `y`
+
+   It is important to assign an identifier to unlabeled points along with the
+   labeled data when training the model with the ``fit`` method. The
+   identifier that this implementation uses is the integer value :math:`-1`.
+   Note that for string labels, the dtype of `y` should be object so that it
+   can contain both strings and integers.
+
+.. note::
+
+   Semi-supervised algorithms need to make assumptions about the distribution
+   of the dataset in order to achieve performance gains. See `here
+   <https://en.wikipedia.org/wiki/Semi-supervised_learning#Assumptions>`_
+   for more details.
+
+.. _self_training:
+
+Self Training
+=============
+
+This self-training implementation is based on Yarowsky's [1]_ algorithm. Using
+this algorithm, a given supervised classifier can function as a semi-supervised
+classifier, allowing it to learn from unlabeled data.
+
+:class:`SelfTrainingClassifier` can be called with any classifier that
+implements `predict_proba`, passed as the parameter `base_classifier`. In
+each iteration, the `base_classifier` predicts labels for the unlabeled
+samples and adds a subset of these labels to the labeled dataset.
+
+The choice of this subset is determined by the selection criterion. This
+selection can be done using a `threshold` on the prediction probabilities, or
+by choosing the `k_best` samples according to the prediction probabilities.
+
+The labels used for the final fit as well as the iteration in which each sample
+was labeled are available as attributes. The optional `max_iter` parameter
+specifies how many times the loop is executed at most.
+
+The `max_iter` parameter may be set to `None`, causing the algorithm to iterate
+until all samples have labels or no new samples are selected in that iteration.
+
+.. note::
+
+   When using the self-training classifier, the
+   :ref:`calibration <calibration>` of the classifier is important.
+
+.. topic:: Examples
+
+  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_self_training_varying_threshold.py`
+  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py`
+
+.. topic:: References
+
+    .. [1] :doi:`"Unsupervised word sense disambiguation rivaling supervised methods"
+       <10.3115/981658.981684>`
+       David Yarowsky, Proceedings of the 33rd annual meeting on Association for
+       Computational Linguistics (ACL '95). Association for Computational Linguistics,
+       Stroudsburg, PA, USA, 189-196.
+
+.. _label_propagation:
+
+Label Propagation
+=================
+
+Label propagation denotes a few variations of semi-supervised graph
+inference algorithms.
+
+A few features available in this model:
+  * Used for classification tasks
+  * Kernel methods to project data into alternate dimensional spaces
+
+`scikit-learn` provides two label propagation models:
+:class:`LabelPropagation` and :class:`LabelSpreading`. Both work by
+constructing a similarity graph over all items in the input dataset.
+
+.. figure:: ../auto_examples/semi_supervised/images/sphx_glr_plot_label_propagation_structure_001.png
+    :target: ../auto_examples/semi_supervised/plot_label_propagation_structure.html
+    :align: center
+    :scale: 60%
+
+    **An illustration of label-propagation:** *the structure of unlabeled
+    observations is consistent with the class structure, and thus the
+    class label can be propagated to the unlabeled observations of the
+    training set.*
+
+:class:`LabelPropagation` and :class:`LabelSpreading`
+differ in modifications to the similarity matrix that graph and the
+clamping effect on the label distributions.
+Clamping allows the algorithm to change the weight of the true ground labeled
+data to some degree. The :class:`LabelPropagation` algorithm performs hard
+clamping of input labels, which means :math:`\alpha=0`. This clamping factor
+can be relaxed, to say :math:`\alpha=0.2`, which means that we will always
+retain 80 percent of our original label distribution, but the algorithm gets to
+change its confidence of the distribution within 20 percent.
+
+:class:`LabelPropagation` uses the raw similarity matrix constructed from
+the data with no modifications. In contrast, :class:`LabelSpreading`
+minimizes a loss function that has regularization properties, as such it
+is often more robust to noise. The algorithm iterates on a modified
+version of the original graph and normalizes the edge weights by
+computing the normalized graph Laplacian matrix. This procedure is also
+used in :ref:`spectral_clustering`.
+
+Label propagation models have two built-in kernel methods. Choice of kernel
+effects both scalability and performance of the algorithms. The following are
+available:
+
+  * rbf (:math:`\exp(-\gamma |x-y|^2), \gamma > 0`). :math:`\gamma` is
+    specified by keyword gamma.
+
+  * knn (:math:`1[x' \in kNN(x)]`). :math:`k` is specified by keyword
+    n_neighbors.
+
+The RBF kernel will produce a fully connected graph which is represented in memory
+by a dense matrix. This matrix may be very large and combined with the cost of
+performing a full matrix multiplication calculation for each iteration of the
+algorithm can lead to prohibitively long running times. On the other hand,
+the KNN kernel will produce a much more memory-friendly sparse matrix
+which can drastically reduce running times.
+
+.. topic:: Examples
+
+  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py`
+  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_structure.py`
+  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits.py`
+  * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits_active_learning.py`
+
+.. topic:: References
+
+    [2] Yoshua Bengio, Olivier Delalleau, Nicolas Le Roux. In Semi-Supervised
+    Learning (2006), pp. 193-216
+
+    [3] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient
+    Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005
+    https://research.microsoft.com/en-us/people/nicolasl/efficient_ssl.pdf
diff -pruN 0.23.2-5/doc/modules/sgd.rst 1.1.1-1/doc/modules/sgd.rst
--- 0.23.2-5/doc/modules/sgd.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/sgd.rst	2022-05-19 12:16:26.456783000 +0000
@@ -26,11 +26,11 @@ correspond to a specific family of machi
 *way* to train a model. Often, an instance of :class:`SGDClassifier` or
 :class:`SGDRegressor` will have an equivalent estimator in
 the scikit-learn API, potentially using a different optimization technique.
-For example, using `SGDClassifier(loss='log')` results in logistic regression,
+For example, using `SGDClassifier(loss='log_loss')` results in logistic regression,
 i.e. a model equivalent to :class:`~sklearn.linear_model.LogisticRegression`
 which is fitted via SGD instead of being fitted by one of the other solvers
 in :class:`~sklearn.linear_model.LogisticRegression`. Similarly,
-`SGDRegressor(loss='squared_loss', penalty='l2')` and
+`SGDRegressor(loss='squared_error', penalty='l2')` and
 :class:`~sklearn.linear_model.Ridge` solve the same optimization problem, via
 different means.
 
@@ -113,7 +113,7 @@ parameter. :class:`SGDClassifier` suppor
 
   * ``loss="hinge"``: (soft-margin) linear Support Vector Machine,
   * ``loss="modified_huber"``: smoothed hinge loss,
-  * ``loss="log"``: logistic regression,
+  * ``loss="log_loss"``: logistic regression,
   * and all regression losses below. In this case the target is encoded as -1
     or 1, and the problem is treated as a regression problem. The predicted
     class then correspond to the sign of the predicted target.
@@ -123,14 +123,14 @@ Please refer to the :ref:`mathematical s
 The first two loss functions are lazy, they only update the model
 parameters if an example violates the margin constraint, which makes
 training very efficient and may result in sparser models (i.e. with more zero
-coefficents), even when L2 penalty is used.
+coefficients), even when L2 penalty is used.
 
-Using ``loss="log"`` or ``loss="modified_huber"`` enables the
+Using ``loss="log_loss"`` or ``loss="modified_huber"`` enables the
 ``predict_proba`` method, which gives a vector of probability estimates
 :math:`P(y|x)` per sample :math:`x`::
 
-    >>> clf = SGDClassifier(loss="log", max_iter=5).fit(X, y)
-    >>> clf.predict_proba([[1., 1.]])
+    >>> clf = SGDClassifier(loss="log_loss", max_iter=5).fit(X, y)
+    >>> clf.predict_proba([[1., 1.]]) # doctest: +SKIP
     array([[0.00..., 0.99...]])
 
 The concrete penalty can be set via the ``penalty`` parameter.
@@ -168,7 +168,7 @@ one-dimensional array of shape (n_classe
 the weight vector of the OVA classifier for the i-th class; classes are
 indexed in ascending order (see attribute ``classes_``).
 Note that, in principle, since they allow to create a probability model,
-``loss="log"`` and ``loss="modified_huber"`` are more suitable for
+``loss="log_loss"`` and ``loss="modified_huber"`` are more suitable for
 one-vs-all classification.
 
 :class:`SGDClassifier` supports both weighted classes and weighted
@@ -211,7 +211,7 @@ samples (> 10.000), for other problems w
 The concrete loss function can be set via the ``loss``
 parameter. :class:`SGDRegressor` supports the following loss functions:
 
-  * ``loss="squared_loss"``: Ordinary least squares,
+  * ``loss="squared_error"``: Ordinary least squares,
   * ``loss="huber"``: Huber loss for robust regression,
   * ``loss="epsilon_insensitive"``: linear Support Vector Regression.
 
@@ -232,6 +232,58 @@ For regression with a squared loss and a
 SGD with an averaging strategy is available with Stochastic Average
 Gradient (SAG) algorithm, available as a solver in :class:`Ridge`.
 
+.. _sgd_online_one_class_svm:
+
+Online One-Class SVM
+====================
+
+The class :class:`sklearn.linear_model.SGDOneClassSVM` implements an online
+linear version of the One-Class SVM using a stochastic gradient descent.
+Combined with kernel approximation techniques,
+:class:`sklearn.linear_model.SGDOneClassSVM` can be used to approximate the
+solution of a kernelized One-Class SVM, implemented in
+:class:`sklearn.svm.OneClassSVM`, with a linear complexity in the number of
+samples. Note that the complexity of a kernelized One-Class SVM is at best
+quadratic in the number of samples.
+:class:`sklearn.linear_model.SGDOneClassSVM` is thus well suited for datasets
+with a large number of training samples (> 10,000) for which the SGD
+variant can be several orders of magnitude faster.
+
+Its implementation is based on the implementation of the stochastic
+gradient descent. Indeed, the original optimization problem of the One-Class
+SVM is given by
+
+.. math::
+
+  \begin{aligned}
+  \min_{w, \rho, \xi} & \quad \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \xi_i \\
+  \text{s.t.} & \quad \langle w, x_i \rangle \geq \rho - \xi_i \quad 1 \leq i \leq n \\
+  & \quad \xi_i \geq 0 \quad 1 \leq i \leq n
+  \end{aligned}
+
+where :math:`\nu \in (0, 1]` is the user-specified parameter controlling the
+proportion of outliers and the proportion of support vectors. Getting rid of
+the slack variables :math:`\xi_i` this problem is equivalent to
+
+.. math::
+
+  \min_{w, \rho} \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \max(0, \rho - \langle w, x_i \rangle) \, .
+
+Multiplying by the constant :math:`\nu` and introducing the intercept
+:math:`b = 1 - \rho` we obtain the following equivalent optimization problem
+
+.. math::
+
+  \min_{w, b} \frac{\nu}{2}\Vert w \Vert^2 + b\nu + \frac{1}{n} \sum_{i=1}^n \max(0, 1 - (\langle w, x_i \rangle + b)) \, .
+
+This is similar to the optimization problems studied in section
+:ref:`sgd_mathematical_formulation` with :math:`y_i = 1, 1 \leq i \leq n` and
+:math:`\alpha = \nu/2`, :math:`L` being the hinge loss function and :math:`R`
+being the L2 norm. We just need to add the term :math:`b\nu` in the
+optimization loop.
+
+As :class:`SGDClassifier` and :class:`SGDRegressor`, :class:`SGDOneClassSVM`
+supports averaged SGD. Averaging can be enabled by setting ``average=True``.
 
 Stochastic Gradient Descent for sparse data
 ===========================================
@@ -356,20 +408,20 @@ parameters, we minimize the regularized
 where :math:`L` is a loss function that measures model (mis)fit and
 :math:`R` is a regularization term (aka penalty) that penalizes model
 complexity; :math:`\alpha > 0` is a non-negative hyperparameter that controls
-the regularization stength.
+the regularization strength.
 
 Different choices for :math:`L` entail different classifiers or regressors:
 
 - Hinge (soft-margin): equivalent to Support Vector Classification.
   :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))`.
-- Perceptron: 
+- Perceptron:
   :math:`L(y_i, f(x_i)) = \max(0, - y_i f(x_i))`.
-- Modified Huber: 
+- Modified Huber:
   :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))^2` if :math:`y_i f(x_i) >
   1`, and :math:`L(y_i, f(x_i)) = -4 y_i f(x_i)` otherwise.
-- Log: equivalent to Logistic Regression.
+- Log Loss: equivalent to Logistic Regression.
   :math:`L(y_i, f(x_i)) = \log(1 + \exp (-y_i f(x_i)))`.
-- Least-Squares: Linear regression (Ridge or Lasso depending on
+- Squared Error: Linear regression (Ridge or Lasso depending on
   :math:`R`).
   :math:`L(y_i, f(x_i)) = \frac{1}{2}(y_i - f(x_i))^2`.
 - Huber: less sensitive to outliers than least-squares. It is equivalent to
@@ -494,8 +546,8 @@ The code is written in Cython.
    .. [#1] `"Stochastic Gradient Descent"
        <https://leon.bottou.org/projects/sgd>`_ L. Bottou - Website, 2010.
 
-   .. [#2] `"Pegasos: Primal estimated sub-gradient solver for svm"
-      <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.74.8513>`_
+   .. [#2] :doi:`"Pegasos: Primal estimated sub-gradient solver for svm"
+      <10.1145/1273496.1273598>`
       S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07.
 
    .. [#3] `"Stochastic gradient descent training for l1-regularized
@@ -504,17 +556,17 @@ The code is written in Cython.
       Y. Tsuruoka, J. Tsujii, S. Ananiadou - In Proceedings of the AFNLP/ACL
       '09.
 
-   .. [#4] `"Towards Optimal One Pass Large Scale Learning with
+   .. [#4] :arxiv:`"Towards Optimal One Pass Large Scale Learning with
       Averaged Stochastic Gradient Descent"
-      <https://arxiv.org/pdf/1107.2490v2.pdf>`_
-      Xu, Wei
+      <1107.2490v2>`
+      Xu, Wei (2011)
 
-   .. [#5] `"Regularization and variable selection via the elastic net"
-      <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.124.4696>`_
+   .. [#5] :doi:`"Regularization and variable selection via the elastic net"
+      <10.1111/j.1467-9868.2005.00503.x>`
       H. Zou, T. Hastie - Journal of the Royal Statistical Society Series B,
       67 (2), 301-320.
 
-   .. [#6] `"Solving large scale linear prediction problems using stochastic
+   .. [#6] :doi:`"Solving large scale linear prediction problems using stochastic
       gradient descent algorithms"
-      <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.58.7377>`_
+      <10.1145/1015330.1015332>`
       T. Zhang - In Proceedings of ICML '04.
diff -pruN 0.23.2-5/doc/modules/svm.rst 1.1.1-1/doc/modules/svm.rst
--- 0.23.2-5/doc/modules/svm.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/svm.rst	2022-05-19 12:16:26.456783000 +0000
@@ -175,9 +175,11 @@ The shape of ``dual_coef_`` is ``(n_clas
 a somewhat hard to grasp layout.
 The columns correspond to the support vectors involved in any
 of the ``n_classes * (n_classes - 1) / 2`` "one-vs-one" classifiers.
-Each of the support vectors is used in ``n_classes - 1`` classifiers.
-The ``n_classes - 1`` entries in each row correspond to the dual coefficients
-for these classifiers.
+Each support vector ``v`` has a dual coefficient in each of the
+``n_classes - 1`` classifiers comparing the class of ``v`` against another class.
+Note that some, but not all, of these dual coefficients, may be zero.
+The ``n_classes - 1`` entries in each column are these dual coefficients,
+ordered by the opposing class.
 
 This might be clearer with an example: consider a three class problem with
 class 0 having three support vectors
@@ -188,21 +190,14 @@ the coefficient of support vector :math:
 classes :math:`i` and :math:`k` :math:`\alpha^{j}_{i,k}`.
 Then ``dual_coef_`` looks like this:
 
-+------------------------+------------------------+------------------+
-|:math:`\alpha^{0}_{0,1}`|:math:`\alpha^{0}_{0,2}`|Coefficients      |
-+------------------------+------------------------+for SVs of class 0|
-|:math:`\alpha^{1}_{0,1}`|:math:`\alpha^{1}_{0,2}`|                  |
-+------------------------+------------------------+                  |
-|:math:`\alpha^{2}_{0,1}`|:math:`\alpha^{2}_{0,2}`|                  |
-+------------------------+------------------------+------------------+
-|:math:`\alpha^{0}_{1,0}`|:math:`\alpha^{0}_{1,2}`|Coefficients      |
-+------------------------+------------------------+for SVs of class 1|
-|:math:`\alpha^{1}_{1,0}`|:math:`\alpha^{1}_{1,2}`|                  |
-+------------------------+------------------------+------------------+
-|:math:`\alpha^{0}_{2,0}`|:math:`\alpha^{0}_{2,1}`|Coefficients      |
-+------------------------+------------------------+for SVs of class 2|
-|:math:`\alpha^{1}_{2,0}`|:math:`\alpha^{1}_{2,1}`|                  |
-+------------------------+------------------------+------------------+
++------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+
+|:math:`\alpha^{0}_{0,1}`|:math:`\alpha^{1}_{0,1}`|:math:`\alpha^{2}_{0,1}`|:math:`\alpha^{0}_{1,0}`|:math:`\alpha^{1}_{1,0}`|:math:`\alpha^{0}_{2,0}`|:math:`\alpha^{1}_{2,0}`|
++------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+
+|:math:`\alpha^{0}_{0,2}`|:math:`\alpha^{1}_{0,2}`|:math:`\alpha^{2}_{0,2}`|:math:`\alpha^{0}_{1,2}`|:math:`\alpha^{1}_{1,2}`|:math:`\alpha^{0}_{2,1}`|:math:`\alpha^{1}_{2,1}`|
++------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+
+|Coefficients                                                              |Coefficients                                     |Coefficients                                     |
+|for SVs of class 0                                                        |for SVs of class 1                               |for SVs of class 2                               |
++--------------------------------------------------------------------------+-------------------------------------------------+-------------------------------------------------+
 
 .. topic:: Examples:
 
@@ -397,10 +392,10 @@ Tips on Practical Use
   * **Setting C**: ``C`` is ``1`` by default and it's a reasonable default
     choice.  If you have a lot of noisy observations you should decrease it:
     decreasing C corresponds to more regularization.
-    
+
     :class:`LinearSVC` and :class:`LinearSVR` are less sensitive to ``C`` when
-    it becomes large, and prediction results stop improving after a certain 
-    threshold. Meanwhile, larger ``C`` values will take more time to train, 
+    it becomes large, and prediction results stop improving after a certain
+    threshold. Meanwhile, larger ``C`` values will take more time to train,
     sometimes up to 10 times longer, as shown in [#3]_.
 
   * Support Vector Machine algorithms are not scale invariant, so **it
@@ -415,10 +410,10 @@ Tips on Practical Use
         >>> from sklearn.svm import SVC
 
         >>> clf = make_pipeline(StandardScaler(), SVC())
-    
+
     See section :ref:`preprocessing` for more details on scaling and
     normalization.
-  
+
   .. _shrinking_svm:
 
   * Regarding the `shrinking` parameter, quoting [#4]_: *We found that if the
@@ -434,7 +429,7 @@ Tips on Practical Use
     positive and few negative), set ``class_weight='balanced'`` and/or try
     different penalty parameters ``C``.
 
-  * **Randomness of the underlying implementations**: The underlying 
+  * **Randomness of the underlying implementations**: The underlying
     implementations of :class:`SVC` and :class:`NuSVC` use a random number
     generator only to shuffle the data for probability estimation (when
     ``probability`` is set to ``True``). This randomness can be controlled
@@ -453,7 +448,7 @@ Tips on Practical Use
     set to ``False`` the underlying implementation of :class:`LinearSVC` is
     not random and ``random_state`` has no effect on the results.
 
-  * Using L1 penalization as provided by ``LinearSVC(loss='l2', penalty='l1',
+  * Using L1 penalization as provided by ``LinearSVC(penalty='l1',
     dual=False)`` yields a sparse solution, i.e. only a subset of feature
     weights is different from zero and contribute to the decision function.
     Increasing ``C`` yields a more complex model (more features are selected).
@@ -500,7 +495,7 @@ correctly.  ``gamma`` defines how much i
 The larger ``gamma`` is, the closer other examples must be to be affected.
 
 Proper choice of ``C`` and ``gamma`` is critical to the SVM's performance.  One
-is advised to use :class:`sklearn.model_selection.GridSearchCV` with 
+is advised to use :class:`~sklearn.model_selection.GridSearchCV` with
 ``C`` and ``gamma`` spaced exponentially far apart to choose good values.
 
 .. topic:: Examples:
@@ -560,7 +555,7 @@ test vectors must be provided:
 
     >>> import numpy as np
     >>> from sklearn.datasets import make_classification
-    >>> from sklearn.model_selection import train_test_split 
+    >>> from sklearn.model_selection import train_test_split
     >>> from sklearn import svm
     >>> X, y = make_classification(n_samples=10, random_state=0)
     >>> X_train , X_test , y_train, y_test = train_test_split(X, y, random_state=0)
@@ -623,7 +618,7 @@ misclassified or within the margin bound
 (w^T \phi (x_i) + b)` would be :math:`\geq 1` for all samples, which
 indicates a perfect prediction. But problems are usually not always perfectly
 separable with a hyperplane, so we allow some samples to be at a distance :math:`\zeta_i` from
-their correct margin boundary. The penalty term `C` controls the strengh of
+their correct margin boundary. The penalty term `C` controls the strength of
 this penalty, and as a result, acts as an inverse regularization parameter
 (see note below).
 
@@ -667,7 +662,7 @@ term :math:`b`
     regularization parameter, most other estimators use ``alpha``. The exact
     equivalence between the amount of regularization of two models depends on
     the exact objective function optimized by the model. For example, when the
-    estimator used is :class:`sklearn.linear_model.Ridge <ridge>` regression,
+    estimator used is :class:`~sklearn.linear_model.Ridge` regression,
     the relation between them is given as :math:`C = \frac{1}{alpha}`.
 
 LinearSVC
@@ -677,7 +672,7 @@ The primal problem can be equivalently f
 
 .. math::
 
-    \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}\max(0, y_i (w^T \phi(x_i) + b)),
+    \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, 1 - y_i (w^T \phi(x_i) + b)),
 
 where we make use of the `hinge loss
 <https://en.wikipedia.org/wiki/Hinge_loss>`_. This is the form that is
@@ -787,7 +782,7 @@ used, please refer to their respective p
       classification by pairwise coupling"
       <https://www.csie.ntu.edu.tw/~cjlin/papers/svmprob/svmprob.pdf>`_, JMLR
       5:975-1005, 2004.
- 
+
    .. [#3] Fan, Rong-En, et al.,
       `"LIBLINEAR: A library for large linear classification."
       <https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf>`_,
@@ -800,14 +795,14 @@ used, please refer to their respective p
       <https://www.microsoft.com/en-us/research/uploads/prod/2006/01/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf>`_,
       chapter 7 Sparse Kernel Machines
 
-   .. [#6] `"A Tutorial on Support Vector Regression"
-      <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.114.4288>`_,
+   .. [#6] :doi:`"A Tutorial on Support Vector Regression"
+      <10.1023/B:STCO.0000035301.49549.88>`
       Alex J. Smola, Bernhard Schölkopf - Statistics and Computing archive
       Volume 14 Issue 3, August 2004, p. 199-222.
 
    .. [#7] Schölkopf et. al `New Support Vector Algorithms
       <https://www.stat.purdue.edu/~yuzhu/stat598m3/Papers/NewSVM.pdf>`_
-    
+
    .. [#8] Crammer and Singer `On the Algorithmic Implementation ofMulticlass
       Kernel-based Vector Machines
       <http://jmlr.csail.mit.edu/papers/volume2/crammer01a/crammer01a.pdf>`_,
diff -pruN 0.23.2-5/doc/modules/tree.rst 1.1.1-1/doc/modules/tree.rst
--- 0.23.2-5/doc/modules/tree.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/modules/tree.rst	2022-05-19 12:16:26.456783000 +0000
@@ -10,7 +10,7 @@ Decision Trees
 for :ref:`classification <tree_classification>` and :ref:`regression
 <tree_regression>`. The goal is to create a model that predicts the value of a
 target variable by learning simple decision rules inferred from the data
-features.
+features. A tree can be seen as a piecewise constant approximation.
 
 For instance, in the example below, decision trees learn from data to
 approximate a sine curve with a set of if-then-else decision rules. The deeper
@@ -23,18 +23,19 @@ the tree, the more complex the decision
 
 Some advantages of decision trees are:
 
-    - Simple to understand and to interpret. Trees can be visualised.
+    - Simple to understand and to interpret. Trees can be visualized.
 
     - Requires little data preparation. Other techniques often require data
-      normalisation, dummy variables need to be created and blank values to
+      normalization, dummy variables need to be created and blank values to
       be removed. Note however that this module does not support missing
       values.
 
     - The cost of using the tree (i.e., predicting data) is logarithmic in the
       number of data points used to train the tree.
 
-    - Able to handle both numerical and categorical data. Other techniques
-      are usually specialised in analysing datasets that have only one type
+    - Able to handle both numerical and categorical data. However scikit-learn
+      implementation does not support categorical variables for now. Other
+      techniques are usually specialized in analyzing datasets that have only one type
       of variable. See :ref:`algorithms <tree_algorithms>` for more
       information.
 
@@ -55,7 +56,7 @@ Some advantages of decision trees are:
 The disadvantages of decision trees include:
 
     - Decision-tree learners can create over-complex trees that do not
-      generalise the data well. This is called overfitting. Mechanisms
+      generalize the data well. This is called overfitting. Mechanisms
       such as pruning, setting the minimum number of samples required
       at a leaf node or setting the maximum depth of the tree are
       necessary to avoid this problem.
@@ -65,6 +66,10 @@ The disadvantages of decision trees incl
       This problem is mitigated by using decision trees within an
       ensemble.
 
+    - Predictions of decision trees are neither smooth nor continuous, but
+      piecewise constant approximations as seen in the above figure. Therefore,
+      they are not good at extrapolation.
+
     - The problem of learning an optimal decision tree is known to be
       NP-complete under several aspects of optimality and even for simple
       concepts. Consequently, practical decision-tree learning algorithms
@@ -91,8 +96,8 @@ Classification
 classification on a dataset.
 
 As with other classifiers, :class:`DecisionTreeClassifier` takes as input two arrays:
-an array X, sparse or dense, of size ``[n_samples, n_features]``  holding the
-training samples, and an array Y of integer values, size ``[n_samples]``,
+an array X, sparse or dense, of shape ``(n_samples, n_features)`` holding the
+training samples, and an array Y of integer values, shape ``(n_samples,)``,
 holding the class labels for the training samples::
 
     >>> from sklearn import tree
@@ -106,8 +111,13 @@ After being fitted, the model can then b
     >>> clf.predict([[2., 2.]])
     array([1])
 
-Alternatively, the probability of each class can be predicted, which is the
-fraction of training samples of the same class in a leaf::
+In case that there are multiple classes with the same and highest
+probability, the classifier will predict the class with the lowest index
+amongst those classes.
+
+As an alternative to outputting a specific class, the probability of each class
+can be predicted, which is the fraction of training samples of the class in a
+leaf::
 
     >>> clf.predict_proba([[2., 2.]])
     array([[0., 1.]])
@@ -120,14 +130,16 @@ Using the Iris dataset, we can construct
 
     >>> from sklearn.datasets import load_iris
     >>> from sklearn import tree
-    >>> X, y = load_iris(return_X_y=True)
+    >>> iris = load_iris()
+    >>> X, y = iris.data, iris.target
     >>> clf = tree.DecisionTreeClassifier()
     >>> clf = clf.fit(X, y)
 
 Once trained, you can plot the tree with the :func:`plot_tree` function::
 
 
-    >>> tree.plot_tree(clf) # doctest: +SKIP
+    >>> tree.plot_tree(clf)
+    [...]
 
 .. figure:: ../auto_examples/tree/images/sphx_glr_plot_iris_dtc_002.png
    :target: ../auto_examples/tree/plot_iris_dtc.html
@@ -241,7 +253,7 @@ Multi-output problems
 =====================
 
 A multi-output problem is a supervised learning problem with several outputs
-to predict, that is when Y is a 2d array of size ``[n_samples, n_outputs]``.
+to predict, that is when Y is a 2d array of shape ``(n_samples, n_outputs)``.
 
 When there is no correlation between the outputs, a very simple way to solve
 this kind of problem is to build n independent models, i.e. one for each
@@ -262,7 +274,7 @@ multi-output problems. This requires the
 This module offers support for multi-output problems by implementing this
 strategy in both :class:`DecisionTreeClassifier` and
 :class:`DecisionTreeRegressor`. If a decision tree is fit on an output array Y
-of size ``[n_samples, n_outputs]`` then the resulting estimator will:
+of shape ``(n_samples, n_outputs)`` then the resulting estimator will:
 
   * Output n_output values upon ``predict``;
 
@@ -312,7 +324,8 @@ In general, the run time cost to constru
 to generate balanced trees, they will not always be balanced.  Assuming that the
 subtrees remain approximately balanced, the cost at each node consists of
 searching through :math:`O(n_{features})` to find the feature that offers the
-largest reduction in entropy.  This has a cost of
+largest reduction in the impurity criterion, e.g. log loss (which is equivalent to an
+information gain). This has a cost of
 :math:`O(n_{features}n_{samples}\log(n_{samples}))` at each node, leading to a
 total cost over the entire trees (by summing the cost at each node) of
 :math:`O(n_{features}n_{samples}^{2}\log(n_{samples}))`.
@@ -333,7 +346,7 @@ Tips on practical use
     in gaining more insights about how the decision tree makes predictions, which is
     important for understanding the important features in the data.
 
-  * Visualise your tree as you are training by using the ``export``
+  * Visualize your tree as you are training by using the ``export``
     function.  Use ``max_depth=3`` as an initial tree depth to get a feel for
     how the tree is fitting to your data, and then increase the depth.
 
@@ -353,6 +366,11 @@ Tips on practical use
     classification with few classes, ``min_samples_leaf=1`` is often the best
     choice.
 
+    Note that ``min_samples_split`` considers samples directly and independent of
+    ``sample_weight``, if provided (e.g. a node with m weighted samples is still
+    treated as having exactly m samples). Consider ``min_weight_fraction_leaf`` or
+    ``min_impurity_decrease`` if accounting for sample weights is required at splits.
+
   * Balance your dataset before training to prevent the tree from being biased
     toward the classes that are dominant. Class balancing can be done by
     sampling an equal number of samples from each class, or preferably by
@@ -377,7 +395,6 @@ Tips on practical use
     most of the samples.
 
 
-
 .. _tree_algorithms:
 
 Tree algorithms: ID3, C4.5, C5.0 and CART
@@ -391,7 +408,7 @@ The algorithm creates a multiway tree, f
 a greedy manner) the categorical feature that will yield the largest
 information gain for categorical targets. Trees are grown to their
 maximum size and then a pruning step is usually applied to improve the
-ability of the tree to generalise to unseen data.
+ability of the tree to generalize to unseen data.
 
 C4.5 is the successor to ID3 and removed the restriction that features
 must be categorical by dynamically defining a discrete attribute (based
@@ -411,7 +428,7 @@ it differs in that it supports numerical
 does not compute rule sets. CART constructs binary trees using the feature
 and threshold that yield the largest information gain at each node.
 
-scikit-learn uses an optimised version of the CART algorithm; however, scikit-learn
+scikit-learn uses an optimized version of the CART algorithm; however, scikit-learn
 implementation does not support categorical variables for now.
 
 .. _ID3: https://en.wikipedia.org/wiki/ID3_algorithm
@@ -424,99 +441,137 @@ Mathematical formulation
 ========================
 
 Given training vectors :math:`x_i \in R^n`, i=1,..., l and a label vector
-:math:`y \in R^l`, a decision tree recursively partitions the space such
-that the samples with the same labels are grouped together.
+:math:`y \in R^l`, a decision tree recursively partitions the feature space
+such that the samples with the same labels or similar target values are grouped
+together.
 
-Let the data at node :math:`m` be represented by :math:`Q`. For
-each candidate split :math:`\theta = (j, t_m)` consisting of a
+Let the data at node :math:`m` be represented by :math:`Q_m` with :math:`n_m`
+samples. For each candidate split :math:`\theta = (j, t_m)` consisting of a
 feature :math:`j` and threshold :math:`t_m`, partition the data into
-:math:`Q_{left}(\theta)` and :math:`Q_{right}(\theta)` subsets
+:math:`Q_m^{left}(\theta)` and :math:`Q_m^{right}(\theta)` subsets
 
 .. math::
 
-    Q_{left}(\theta) = {(x, y) | x_j <= t_m}
+    Q_m^{left}(\theta) = \{(x, y) | x_j <= t_m\}
 
-    Q_{right}(\theta) = Q \setminus Q_{left}(\theta)
+    Q_m^{right}(\theta) = Q_m \setminus Q_m^{left}(\theta)
 
-The impurity at :math:`m` is computed using an impurity function
-:math:`H()`, the choice of which depends on the task being solved
-(classification or regression)
+The quality of a candidate split of node :math:`m` is then computed using an
+impurity function or loss function :math:`H()`, the choice of which depends on
+the task being solved (classification or regression)
 
 .. math::
 
-   G(Q, \theta) = \frac{n_{left}}{N_m} H(Q_{left}(\theta))
-   + \frac{n_{right}}{N_m} H(Q_{right}(\theta))
+   G(Q_m, \theta) = \frac{n_m^{left}}{n_m} H(Q_m^{left}(\theta))
+   + \frac{n_m^{right}}{n_m} H(Q_m^{right}(\theta))
 
 Select the parameters that minimises the impurity
 
 .. math::
 
-    \theta^* = \operatorname{argmin}_\theta  G(Q, \theta)
+    \theta^* = \operatorname{argmin}_\theta  G(Q_m, \theta)
 
-Recurse for subsets :math:`Q_{left}(\theta^*)` and
-:math:`Q_{right}(\theta^*)` until the maximum allowable depth is reached,
-:math:`N_m < \min_{samples}` or :math:`N_m = 1`.
+Recurse for subsets :math:`Q_m^{left}(\theta^*)` and
+:math:`Q_m^{right}(\theta^*)` until the maximum allowable depth is reached,
+:math:`n_m < \min_{samples}` or :math:`n_m = 1`.
 
 Classification criteria
 -----------------------
 
 If a target is a classification outcome taking on values 0,1,...,K-1,
-for node :math:`m`, representing a region :math:`R_m` with :math:`N_m`
-observations, let
+for node :math:`m`, let
 
 .. math::
 
-    p_{mk} = 1/ N_m \sum_{x_i \in R_m} I(y_i = k)
+    p_{mk} = \frac{1}{n_m} \sum_{y \in Q_m} I(y = k)
 
-be the proportion of class k observations in node :math:`m`
+be the proportion of class k observations in node :math:`m`. If :math:`m` is a
+terminal node, `predict_proba` for this region is set to :math:`p_{mk}`.
+Common measures of impurity are the following.
 
-Common measures of impurity are Gini
+Gini:
 
 .. math::
 
-    H(X_m) = \sum_k p_{mk} (1 - p_{mk})
+    H(Q_m) = \sum_k p_{mk} (1 - p_{mk})
 
-Entropy
+Log Loss or Entropy:
 
 .. math::
 
-    H(X_m) = - \sum_k p_{mk} \log(p_{mk})
+    H(Q_m) = - \sum_k p_{mk} \log(p_{mk})
 
-and Misclassification
 
-.. math::
+.. note::
+
+  The entropy criterion computes the Shannon entropy of the possible classes. It
+  takes the class frequencies of the training data points that reached a given
+  leaf :math:`m` as their probability. Using the **Shannon entropy as tree node
+  splitting criterion is equivalent to minimizing the log loss** (also known as
+  cross-entropy and multinomial deviance) between the true labels :math:`y_i`
+  and the probalistic predictions :math:`T_k(x_i)` of the tree model :math:`T` for class :math:`k`.
+
+  To see this, first recall that the log loss of a tree model :math:`T`
+  computed on a dataset :math:`D` is defined as follows:
+
+  .. math::
+  
+      \mathrm{LL}(D, T) = -\frac{1}{n} \sum_{(x_i, y_i) \in D} \sum_k I(y_i = k) \log(T_k(x_i))
 
-    H(X_m) = 1 - \max(p_{mk})
+  where :math:`D` is a training dataset of :math:`n` pairs :math:`(x_i, y_i)`.
 
-where :math:`X_m` is the training data in node :math:`m`
+  In a classification tree, the predicted class probabilities within leaf nodes
+  are constant, that is: for all :math:`(x_i, y_i) \in Q_m`, one has:
+  :math:`T_k(x_i) = p_{mk}` for each class :math:`k`.
+
+  This property makes it possible to rewrite :math:`\mathrm{LL}(D, T)` as the
+  sum of the Shannon entropies computed for each leaf of :math:`T` weighted by
+  the number of training data points that reached each leaf:
+
+  .. math::
+  
+      \mathrm{LL}(D, T) = \sum_{m \in T} \frac{n_m}{n} H(Q_m)
 
 Regression criteria
 -------------------
 
-If the target is a continuous value, then for node :math:`m`,
-representing a region :math:`R_m` with :math:`N_m` observations, common
-criteria to minimise as for determining locations for future
-splits are Mean Squared Error, which minimizes the L2 error
-using mean values at terminal nodes, and Mean Absolute Error, which
-minimizes the L1 error using median values at terminal nodes.
+If the target is a continuous value, then for node :math:`m`, common
+criteria to minimize as for determining locations for future splits are Mean
+Squared Error (MSE or L2 error), Poisson deviance as well as Mean Absolute
+Error (MAE or L1 error). MSE and Poisson deviance both set the predicted value
+of terminal nodes to the learned mean value :math:`\bar{y}_m` of the node
+whereas the MAE sets the predicted value of terminal nodes to the median
+:math:`median(y)_m`.
 
 Mean Squared Error:
 
 .. math::
 
-    \bar{y}_m = \frac{1}{N_m} \sum_{i \in N_m} y_i
+    \bar{y}_m = \frac{1}{n_m} \sum_{y \in Q_m} y
+
+    H(Q_m) = \frac{1}{n_m} \sum_{y \in Q_m} (y - \bar{y}_m)^2
+
+Half Poisson deviance:
+
+.. math::
+
+    H(Q_m) = \frac{1}{n_m} \sum_{y \in Q_m} (y \log\frac{y}{\bar{y}_m}
+    - y + \bar{y}_m)
 
-    H(X_m) = \frac{1}{N_m} \sum_{i \in N_m} (y_i - \bar{y}_m)^2
+Setting `criterion="poisson"` might be a good choice if your target is a count
+or a frequency (count per some unit). In any case, :math:`y >= 0` is a
+necessary condition to use this criterion. Note that it fits much slower than
+the MSE criterion.
 
 Mean Absolute Error:
 
 .. math::
 
-    median(y)_m = \underset{i \in N_m}{\mathrm{median}}(y_i)
+    median(y)_m = \underset{y \in Q_m}{\mathrm{median}}(y)
 
-    H(X_m) = \frac{1}{N_m} \sum_{i \in N_m} |y_i - median(y)_m|
+    H(Q_m) = \frac{1}{n_m} \sum_{y \in Q_m} |y - median(y)_m|
 
-where :math:`X_m` is the training data in node :math:`m`
+Note that it fits much slower than the MSE criterion.
 
 
 .. _minimal_cost_complexity_pruning:
@@ -532,9 +587,9 @@ a given tree :math:`T`:
 
 .. math::
 
-  R_\alpha(T) = R(T) + \alpha|T|
+  R_\alpha(T) = R(T) + \alpha|\widetilde{T}|
 
-where :math:`|T|` is the number of terminal nodes in :math:`T` and :math:`R(T)`
+where :math:`|\widetilde{T}|` is the number of terminal nodes in :math:`T` and :math:`R(T)`
 is traditionally defined as the total misclassification rate of the terminal
 nodes. Alternatively, scikit-learn uses the total sample weighted impurity of
 the terminal nodes for :math:`R(T)`. As shown above, the impurity of a node
diff -pruN 0.23.2-5/doc/README.md 1.1.1-1/doc/README.md
--- 0.23.2-5/doc/README.md	2020-08-04 12:12:58.856675400 +0000
+++ 1.1.1-1/doc/README.md	2022-05-19 12:16:26.432781500 +0000
@@ -1,6 +1,6 @@
 # Documentation for scikit-learn
 
-This directory contains the full manual and web site as displayed at
+This directory contains the full manual and website as displayed at
 http://scikit-learn.org. See
 http://scikit-learn.org/dev/developers/contributing.html#documentation for
 detailed information about the documentation. 
diff -pruN 0.23.2-5/doc/related_projects.rst 1.1.1-1/doc/related_projects.rst
--- 0.23.2-5/doc/related_projects.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/related_projects.rst	2022-05-19 12:16:26.456783000 +0000
@@ -7,7 +7,7 @@ Related Projects
 Projects implementing the scikit-learn estimator API are encouraged to use
 the `scikit-learn-contrib template <https://github.com/scikit-learn-contrib/project-template>`_
 which facilitates best practices for testing and documenting estimators.
-The `scikit-learn-contrib GitHub organisation <https://github.com/scikit-learn-contrib/scikit-learn-contrib>`_
+The `scikit-learn-contrib GitHub organization <https://github.com/scikit-learn-contrib/scikit-learn-contrib>`_
 also accepts high-quality contributions of repositories conforming to this
 template.
 
@@ -36,14 +36,44 @@ enhance the functionality of scikit-lear
   An automated machine learning toolkit and a drop-in replacement for a
   scikit-learn estimator
 
+- `autoviml <https://github.com/AutoViML/Auto_ViML/>`_
+  Automatically Build Multiple Machine Learning Models with a Single Line of Code.
+  Designed as a faster way to use scikit-learn models without having to preprocess data.
+
 - `TPOT <https://github.com/rhiever/tpot>`_
   An automated machine learning toolkit that optimizes a series of scikit-learn
   operators to design a machine learning pipeline, including data and feature
   preprocessors as well as the estimators. Works as a drop-in replacement for a
   scikit-learn estimator.
 
+- `Featuretools <https://github.com/alteryx/featuretools>`_
+  A framework to perform automated feature engineering. It can be used for
+  transforming temporal and relational datasets into feature matrices for
+  machine learning.
+
+- `Neuraxle <https://github.com/Neuraxio/Neuraxle>`_
+  A library for building neat pipelines, providing the right abstractions to
+  both ease research, development, and deployment of machine learning
+  applications. Compatible with deep learning frameworks and scikit-learn API,
+  it can stream minibatches, use data checkpoints, build funky pipelines, and
+  serialize models with custom per-step savers.
+
+- `EvalML <https://github.com/alteryx/evalml>`_
+  EvalML is an AutoML library which builds, optimizes, and evaluates
+  machine learning pipelines using domain-specific objective functions.
+  It incorporates multiple modeling libraries under one API, and
+  the objects that EvalML creates use an sklearn-compatible API.
+
 **Experimentation frameworks**
 
+- `Neptune <https://neptune.ai/>`_ Metadata store for MLOps,
+  built for teams that run a lot of experiments.‌ It gives you a single
+  place to log, store, display, organize, compare, and query all your
+  model building metadata.
+
+- `Sacred <https://github.com/IDSIA/Sacred>`_ Tool to help you configure,
+  organize, log and reproduce experiments
+
 - `REP <https://github.com/yandex/REP>`_ Environment for conducting data-driven
   research in a consistent and reproducible way
 
@@ -52,7 +82,7 @@ enhance the functionality of scikit-lear
   wrapper around scikit-learn that makes it easy to run machine learning
   experiments with multiple learners and large feature sets.
 
-**Model inspection and visualisation**
+**Model inspection and visualization**
 
 - `dtreeviz <https://github.com/parrt/dtreeviz/>`_ A python library for
   decision tree visualization and model interpretation.
@@ -77,11 +107,11 @@ enhance the functionality of scikit-lear
   cross-validated parameter search using any of these strategies.
 
 - `sklearn-deap <https://github.com/rsteca/sklearn-deap>`_ Use evolutionary
-   algorithms instead of gridsearch in scikit-learn.
+  algorithms instead of gridsearch in scikit-learn.
 
 **Model export for production**
 
-- `onnxmltools <https://github.com/onnx/onnxmltools>`_ Serializes many
+- `sklearn-onnx <https://github.com/onnx/sklearn-onnx>`_ Serialization of many
   Scikit-learn pipelines to `ONNX <https://onnx.ai/>`_ for interchange and
   prediction.
 
@@ -93,6 +123,11 @@ enhance the functionality of scikit-lear
 - `sklearn-porter <https://github.com/nok/sklearn-porter>`_
   Transpile trained scikit-learn models to C, Java, Javascript and others.
 
+- `m2cgen <https://github.com/BayesWitnesses/m2cgen>`_
+  A lightweight library which allows to transpile trained machine learning
+  models including many scikit-learn estimators into a native code of C, Java,
+  Go, R, PHP, Dart, Haskell, Rust and many other programming languages.
+
 - `treelite <https://treelite.readthedocs.io>`_
   Compiles tree-based ensemble models into C code for minimizing prediction
   latency.
@@ -108,7 +143,7 @@ and tasks.
 
 **Structured learning**
 
-- `tslearn <https://github.com/tslearn-team/tslearn>`_ A machine learning library for time series 
+- `tslearn <https://github.com/tslearn-team/tslearn>`_ A machine learning library for time series
   that offers tools for pre-processing and feature extraction as well as dedicated models for clustering, classification and regression.
 
 - `sktime <https://github.com/alan-turing-institute/sktime>`_ A scikit-learn compatible toolbox for machine learning with time series including time series classification/regression and (supervised/panel) forecasting.
@@ -132,8 +167,8 @@ and tasks.
 - `nolearn <https://github.com/dnouri/nolearn>`_ A number of wrappers and
   abstractions around existing neural network libraries
 
-- `keras <https://github.com/fchollet/keras>`_ Deep Learning library capable of
-  running on top of either TensorFlow or Theano.
+- `Keras <https://www.tensorflow.org/api_docs/python/tf/keras>`_ High-level API for
+  TensorFlow with a scikit-learn inspired API.
 
 - `lasagne <https://github.com/Lasagne/Lasagne>`_ A lightweight library to
   build and train neural networks in Theano.
@@ -141,11 +176,23 @@ and tasks.
 - `skorch <https://github.com/dnouri/skorch>`_ A scikit-learn compatible
   neural network library that wraps PyTorch.
 
+- `scikeras <https://github.com/adriangb/scikeras>`_ provides a wrapper around
+  Keras to interface it with scikit-learn. SciKeras is the successor
+  of `tf.keras.wrappers.scikit_learn`.
+
+**Federated Learning**
+
+- `Flower <https://flower.dev/>`_ A friendly federated learning framework with a
+  unified approach that can federate any workload, any ML framework, and any programming language.
+
 **Broad scope**
 
 - `mlxtend <https://github.com/rasbt/mlxtend>`_ Includes a number of additional
   estimators as well as model visualization utilities.
 
+- `scikit-lego <https://github.com/koaning/scikit-lego>`_ A number of scikit-learn compatible
+  custom transformers, models and metrics, focusing on solving practical industry tasks.
+
 **Other regression and classification**
 
 - `xgboost <https://github.com/dmlc/xgboost>`_ Optimised gradient boosted decision
@@ -185,7 +232,7 @@ and tasks.
   Dirichlet allocation in Cython which uses `Gibbs sampling
   <https://en.wikipedia.org/wiki/Gibbs_sampling>`_ to sample from the true
   posterior distribution. (scikit-learn's
-  :class:`sklearn.decomposition.LatentDirichletAllocation` implementation uses
+  :class:`~sklearn.decomposition.LatentDirichletAllocation` implementation uses
   `variational inference
   <https://en.wikipedia.org/wiki/Variational_Bayesian_methods>`_ to sample from
   a tractable approximation of a topic model's posterior distribution.)
@@ -210,6 +257,23 @@ and tasks.
   <https://github.com/scikit-learn-contrib/imbalanced-learn>`_ Various
   methods to under- and over-sample datasets.
 
+- `Feature-engine <https://github.com/solegalli/feature_engine>`_ A library
+  of sklearn compatible transformers for missing data imputation, categorical
+  encoding, variable transformation, discretization, outlier handling and more.
+  Feature-engine allows the application of preprocessing steps to selected groups
+  of variables and it is fully compatible with the Scikit-learn Pipeline.
+
+**Topological Data Analysis**
+
+- `giotto-tda <https://github.com/giotto-ai/giotto-tda>`_ A library for
+  `Topological Data Analysis
+  <https://en.wikipedia.org/wiki/Topological_data_analysis>`_ aiming to
+  provide a scikit-learn compatible API. It offers tools to transform data
+  inputs (point clouds, graphs, time series, images) into forms suitable for
+  computations of topological summaries, and components dedicated to
+  extracting sets of scalar features of topological origin, which can be used
+  alongside other feature extraction methods in scikit-learn.
+
 Statistical learning with Python
 --------------------------------
 Other packages useful for data analysis and machine learning.
@@ -224,12 +288,13 @@ Other packages useful for data analysis
 - `PyMC <https://pymc-devs.github.io/pymc/>`_ Bayesian statistical models and
   fitting algorithms.
 
-- `Sacred <https://github.com/IDSIA/Sacred>`_ Tool to help you configure,
-  organize, log and reproduce experiments
-
 - `Seaborn <https://stanford.edu/~mwaskom/software/seaborn/>`_ Visualization library based on
   matplotlib. It provides a high-level interface for drawing attractive statistical graphics.
 
+- `scikit-survival <https://scikit-survival.readthedocs.io/>`_ A library implementing
+  models to learn from censored time-to-event data (also called survival analysis).
+  Models are fully compatible with scikit-learn.
+
 Recommendation Engine packages
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -266,3 +331,33 @@ Domain specific packages
 
 - `MSMBuilder <http://msmbuilder.org/>`_  Machine learning for protein
   conformational dynamics time series.
+
+Translations of scikit-learn documentation
+------------------------------------------
+
+Translation’s purpose is to ease reading and understanding in languages
+other than English. Its aim is to help people who do not understand English
+or have doubts about its interpretation. Additionally, some people prefer
+to read documentation in their native language, but please bear in mind that
+the only official documentation is the English one [#f1]_.
+
+Those translation efforts are community initiatives and we have no control
+on them.
+If you want to contribute or report an issue with the translation, please
+contact the authors of the translation.
+Some available translations are linked here to improve their dissemination
+and promote community efforts.
+
+- `Chinese translation <https://sklearn.apachecn.org/>`_
+  (`source <https://github.com/apachecn/sklearn-doc-zh>`__)
+- `Persian translation <https://sklearn.ir/>`_
+  (`source <https://github.com/mehrdad-dev/scikit-learn>`__)
+- `Spanish translation <https://qu4nt.github.io/sklearn-doc-es/>`_
+  (`source <https://github.com/qu4nt/sklearn-doc-es>`__)
+
+
+.. rubric:: Footnotes
+
+.. [#f1] following `linux documentation Disclaimer
+   <https://www.kernel.org/doc/html/latest/translations/index.html#disclaimer>`__
+
diff -pruN 0.23.2-5/doc/roadmap.rst 1.1.1-1/doc/roadmap.rst
--- 0.23.2-5/doc/roadmap.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/roadmap.rst	2022-05-19 12:16:26.456783000 +0000
@@ -51,7 +51,7 @@ external to the core library.
   (i.e. rectangular data largely invariant to column and row order;
   predicting targets with simple structure)
 * improve the ease for users to develop and publish external components
-* improve inter-operability with modern data science tools (e.g. Pandas, Dask)
+* improve interoperability with modern data science tools (e.g. Pandas, Dask)
   and infrastructures (e.g. distributed processing)
 
 Many of the more fine-grained goals can be found under the `API tag
@@ -70,16 +70,16 @@ the document up to date as we work on th
 
    * document current handling
    * column reordering issue :issue:`7242`
-   * avoiding unnecessary conversion to ndarray :issue:`12147`
+   * avoiding unnecessary conversion to ndarray |ss| :issue:`12147` |se|
    * returning DataFrames from transformers :issue:`5523`
-   * getting DataFrames from dataset loaders :issue:`10733`,
+   * getting DataFrames from dataset loaders |ss| :issue:`10733` |se|,
      |ss| :issue:`13902` |se|
-   * Sparse currently not considered :issue:`12800`
+   * Sparse currently not considered |ss| :issue:`12800` |se|
 
 #. Improved handling of categorical features
 
    * Tree-based models should be able to handle both continuous and categorical
-     features :issue:`12866` and :issue:`15550`.
+     features :issue:`12866` and |ss| :issue:`15550` |se|.
    * |ss| In dataset loaders :issue:`13902` |se|
    * As generic transformers to be used with ColumnTransforms (e.g. ordinal
      encoding supervised by correlation with target variable) :issue:`5853`,
@@ -89,7 +89,7 @@ the document up to date as we work on th
 #. Improved handling of missing data
 
    * Making sure meta-estimators are lenient towards missing data,
-     :issue:`15319`
+     |ss| :issue:`15319` |se|
    * Non-trivial imputers |ss| :issue:`11977`, :issue:`12852` |se|
    * Learners directly handling missing data |ss| :issue:`13911` |se|
    * An amputation sample generator to make parts of a dataset go missing
@@ -125,19 +125,20 @@ the document up to date as we work on th
    components
 
    * More flexible estimator checks that do not select by estimator name
-     :issue:`6599` :issue:`6715`
-   * Example of how to develop an estimator or a meta-estimator, :issue:`14582`
+     |ss| :issue:`6599` |se| :issue:`6715`
+   * Example of how to develop an estimator or a meta-estimator,
+     |ss| :issue:`14582` |se|
    * More self-sufficient running of scikit-learn-contrib or a similar resource
 
 #. Support resampling and sample reduction
 
    * Allow subsampling of majority classes (in a pipeline?) :issue:`3855`
-   * Implement random forests with resampling :issue:`8732`
+   * Implement random forests with resampling :issue:`13227`
 
 #. Better interfaces for interactive development
 
-   * |ss| __repr__ |se| and HTML visualisations of estimators
-     |ss| :issue:`6323` |se| and :pr:`14180`.
+   * |ss| __repr__ and HTML visualisations of estimators
+     :issue:`6323` and :pr:`14180` |se|.
    * Include plotting tools, not just as examples. :issue:`9173`
 
 #. Improved tools for model diagnostics and basic inference
@@ -223,7 +224,7 @@ the document up to date as we work on th
    (to be discussed);
 
    * Extend documentation to mention how to deploy models in Python-free
-     environments for instance  `ONNX <https://github.com/onnx/onnxmltools>`_.
+     environments for instance `ONNX <https://github.com/onnx/sklearn-onnx>`_.
      and use the above best practices to assess predictive consistency between
      scikit-learn and ONNX prediction functions on validation set.
    * Document good practices to detect temporal distribution drift for deployed
@@ -249,7 +250,7 @@ Subpackage-specific goals
 * perhaps we want to be able to get back more than multiple metrics
 * the handling of random states in CV splitters is a poor design and
   contradicts the validation of similar parameters in estimators,
-  :issue:`15177`
+  `SLEP011 <https://github.com/scikit-learn/enhancement_proposals/pull/24>`_
 * exploit warm-starting and path algorithms so the benefits of `EstimatorCV`
   objects can be accessed via `GridSearchCV` and used in Pipelines.
   :issue:`1626`
diff -pruN 0.23.2-5/doc/sphinxext/add_toctree_functions.py 1.1.1-1/doc/sphinxext/add_toctree_functions.py
--- 0.23.2-5/doc/sphinxext/add_toctree_functions.py	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/sphinxext/add_toctree_functions.py	2022-05-19 12:16:26.456783000 +0000
@@ -0,0 +1,160 @@
+"""Inspired by https://github.com/pandas-dev/pydata-sphinx-theme
+
+BSD 3-Clause License
+
+Copyright (c) 2018, pandas
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import docutils
+
+
+def add_toctree_functions(app, pagename, templatename, context, doctree):
+    """Add functions so Jinja templates can add toctree objects.
+
+    This converts the docutils nodes into a nested dictionary that Jinja can
+    use in our templating.
+    """
+    from sphinx.environment.adapters.toctree import TocTree
+
+    def get_nav_object(maxdepth=None, collapse=True, numbered=False, **kwargs):
+        """Return a list of nav links that can be accessed from Jinja.
+
+        Parameters
+        ----------
+        maxdepth: int
+            How many layers of TocTree will be returned
+        collapse: bool
+            Whether to only include sub-pages of the currently-active page,
+            instead of sub-pages of all top-level pages of the site.
+        numbered: bool
+            Whether to add section number to title
+        kwargs: key/val pairs
+            Passed to the `TocTree.get_toctree_for` Sphinx method
+        """
+        # The TocTree will contain the full site TocTree including sub-pages.
+        # "collapse=True" collapses sub-pages of non-active TOC pages.
+        # maxdepth controls how many TOC levels are returned
+        toctree = TocTree(app.env).get_toctree_for(
+            pagename, app.builder, collapse=collapse, maxdepth=maxdepth, **kwargs
+        )
+        # If no toctree is defined (AKA a single-page site), skip this
+        if toctree is None:
+            return []
+
+        # toctree has this structure
+        #   <caption>
+        #   <bullet_list>
+        #       <list_item classes="toctree-l1">
+        #       <list_item classes="toctree-l1">
+        # `list_item`s are the actual TOC links and are the only thing we want
+        toc_items = [
+            item
+            for child in toctree.children
+            for item in child
+            if isinstance(item, docutils.nodes.list_item)
+        ]
+
+        # Now convert our docutils nodes into dicts that Jinja can use
+        nav = [
+            docutils_node_to_jinja(child, only_pages=True, numbered=numbered)
+            for child in toc_items
+        ]
+
+        return nav
+
+    context["get_nav_object"] = get_nav_object
+
+
+def docutils_node_to_jinja(list_item, only_pages=False, numbered=False):
+    """Convert a docutils node to a structure that can be read by Jinja.
+
+    Parameters
+    ----------
+    list_item : docutils list_item node
+        A parent item, potentially with children, corresponding to the level
+        of a TocTree.
+    only_pages : bool
+        Only include items for full pages in the output dictionary. Exclude
+        anchor links (TOC items with a URL that starts with #)
+    numbered: bool
+        Whether to add section number to title
+
+    Returns
+    -------
+    nav : dict
+        The TocTree, converted into a dictionary with key/values that work
+        within Jinja.
+    """
+    if not list_item.children:
+        return None
+
+    # We assume this structure of a list item:
+    # <list_item>
+    #     <compact_paragraph >
+    #         <reference> <-- the thing we want
+    reference = list_item.children[0].children[0]
+    title = reference.astext()
+    url = reference.attributes["refuri"]
+    active = "current" in list_item.attributes["classes"]
+
+    secnumber = reference.attributes.get("secnumber", None)
+    if numbered and secnumber is not None:
+        secnumber = ".".join(str(n) for n in secnumber)
+        title = f"{secnumber}. {title}"
+
+    # If we've got an anchor link, skip it if we wish
+    if only_pages and "#" in url:
+        return None
+
+    # Converting the docutils attributes into jinja-friendly objects
+    nav = {}
+    nav["title"] = title
+    nav["url"] = url
+    nav["active"] = active
+
+    # Recursively convert children as well
+    # If there are sub-pages for this list_item, there should be two children:
+    # a paragraph, and a bullet_list.
+    nav["children"] = []
+    if len(list_item.children) > 1:
+        # The `.children` of the bullet_list has the nodes of the sub-pages.
+        subpage_list = list_item.children[1].children
+        for sub_page in subpage_list:
+            child_nav = docutils_node_to_jinja(
+                sub_page, only_pages=only_pages, numbered=numbered
+            )
+            if child_nav is not None:
+                nav["children"].append(child_nav)
+    return nav
+
+
+def setup(app):
+    app.connect("html-page-context", add_toctree_functions)
+
+    return {"parallel_read_safe": True, "parallel_write_safe": True}
diff -pruN 0.23.2-5/doc/sphinxext/allow_nan_estimators.py 1.1.1-1/doc/sphinxext/allow_nan_estimators.py
--- 0.23.2-5/doc/sphinxext/allow_nan_estimators.py	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/sphinxext/allow_nan_estimators.py	2022-05-19 12:16:26.456783000 +0000
@@ -0,0 +1,59 @@
+from sklearn.utils import all_estimators
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import FeatureUnion
+from sklearn.decomposition import SparseCoder
+from sklearn.utils.estimator_checks import _construct_instance
+from sklearn.utils._testing import SkipTest
+from docutils import nodes
+import warnings
+from contextlib import suppress
+
+from docutils.parsers.rst import Directive
+
+
+class AllowNanEstimators(Directive):
+    @staticmethod
+    def make_paragraph_for_estimator_type(estimator_type):
+        intro = nodes.list_item()
+        intro += nodes.strong(text="Estimators that allow NaN values for type ")
+        intro += nodes.literal(text=f"{estimator_type}")
+        intro += nodes.strong(text=":\n")
+        exists = False
+        lst = nodes.bullet_list()
+        for name, est_class in all_estimators(type_filter=estimator_type):
+            with suppress(SkipTest):
+                est = _construct_instance(est_class)
+
+            if est._get_tags().get("allow_nan"):
+                module_name = ".".join(est_class.__module__.split(".")[:2])
+                class_title = f"{est_class.__name__}"
+                class_url = f"generated/{module_name}.{class_title}.html"
+                item = nodes.list_item()
+                para = nodes.paragraph()
+                para += nodes.reference(
+                    class_title, text=class_title, internal=False, refuri=class_url
+                )
+                exists = True
+                item += para
+                lst += item
+        intro += lst
+        return [intro] if exists else None
+
+    def run(self):
+        lst = nodes.bullet_list()
+        for i in ["cluster", "regressor", "classifier", "transformer"]:
+            item = self.make_paragraph_for_estimator_type(i)
+            if item is not None:
+                lst += item
+        return [lst]
+
+
+def setup(app):
+
+    app.add_directive("allow_nan_estimators", AllowNanEstimators)
+
+    return {
+        "version": "0.1",
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }
diff -pruN 0.23.2-5/doc/sphinxext/custom_references_resolver.py 1.1.1-1/doc/sphinxext/custom_references_resolver.py
--- 0.23.2-5/doc/sphinxext/custom_references_resolver.py	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/sphinxext/custom_references_resolver.py	2022-05-19 12:16:26.456783000 +0000
@@ -42,26 +42,29 @@ from sphinx.transforms.post_transforms i
 class CustomReferencesResolver(ReferencesResolver):
     def resolve_anyref(self, refdoc, node, contnode):
         """Resolve reference generated by the "any" role."""
-        stddomain = self.env.get_domain('std')
-        target = node['reftarget']
+        stddomain = self.env.get_domain("std")
+        target = node["reftarget"]
 
         # process 'py' domain first for python classes
         if "py:class" in node:
             with suppress(KeyError):
-                py_domain = self.env.domains['py']
+                py_domain = self.env.domains["py"]
                 py_ref = py_domain.resolve_any_xref(
-                    self.env, refdoc, self.app.builder, target, node, contnode)
+                    self.env, refdoc, self.app.builder, target, node, contnode
+                )
                 if py_ref:
                     return self.create_node(py_ref[0])
 
         # resolve :term:
-        term_ref = stddomain.resolve_xref(self.env, refdoc, self.app.builder,
-                                          'term', target, node, contnode)
+        term_ref = stddomain.resolve_xref(
+            self.env, refdoc, self.app.builder, "term", target, node, contnode
+        )
         if term_ref:
             # replace literal nodes with inline nodes
             if not isinstance(term_ref[0], nodes.inline):
-                inline_node = nodes.inline(rawsource=term_ref[0].rawsource,
-                                           classes=term_ref[0].get('classes'))
+                inline_node = nodes.inline(
+                    rawsource=term_ref[0].rawsource, classes=term_ref[0].get("classes")
+                )
                 if term_ref[0]:
                     inline_node.append(term_ref[0][0])
                 term_ref[0] = inline_node
@@ -69,46 +72,52 @@ class CustomReferencesResolver(Reference
 
         # next, do the standard domain
         std_ref = stddomain.resolve_any_xref(
-            self.env, refdoc, self.app.builder, target, node, contnode)
+            self.env, refdoc, self.app.builder, target, node, contnode
+        )
         if std_ref:
             return self.create_node(std_ref[0])
 
         for domain in self.env.domains.values():
             try:
                 ref = domain.resolve_any_xref(
-                    self.env, refdoc, self.app.builder, target, node, contnode)
+                    self.env, refdoc, self.app.builder, target, node, contnode
+                )
                 if ref:
                     return self.create_node(ref[0])
             except NotImplementedError:
                 # the domain doesn't yet support the new interface
                 # we have to manually collect possible references (SLOW)
                 for role in domain.roles:
-                    res = domain.resolve_xref(self.env, refdoc,
-                                              self.app.builder, role, target,
-                                              node, contnode)
+                    res = domain.resolve_xref(
+                        self.env, refdoc, self.app.builder, role, target, node, contnode
+                    )
                     if res and isinstance(res[0], nodes.Element):
-                        result = ('%s:%s' % (domain.name, role), res)
+                        result = ("%s:%s" % (domain.name, role), res)
                         return self.create_node(result)
 
         # no results considered to be <code>
-        contnode['classes'] = []
+        contnode["classes"] = []
         return contnode
 
     def create_node(self, result):
         res_role, newnode = result
         # Override "any" class with the actual role type to get the styling
         # approximately correct.
-        res_domain = res_role.split(':')[0]
-        if (len(newnode) > 0 and isinstance(newnode[0], nodes.Element)
-                and newnode[0].get('classes')):
-            newnode[0]['classes'].append(res_domain)
-            newnode[0]['classes'].append(res_role.replace(':', '-'))
+        res_domain = res_role.split(":")[0]
+        if (
+            len(newnode) > 0
+            and isinstance(newnode[0], nodes.Element)
+            and newnode[0].get("classes")
+        ):
+            newnode[0]["classes"].append(res_domain)
+            newnode[0]["classes"].append(res_role.replace(":", "-"))
         return newnode
 
 
 def setup(app):
-    if (hasattr(app.registry, "get_post_transforms")
-            and callable(app.registry.get_post_transforms)):
+    if hasattr(app.registry, "get_post_transforms") and callable(
+        app.registry.get_post_transforms
+    ):
         post_transforms = app.registry.get_post_transforms()
     else:
         # Support sphinx 1.6.*
diff -pruN 0.23.2-5/doc/sphinxext/doi_role.py 1.1.1-1/doc/sphinxext/doi_role.py
--- 0.23.2-5/doc/sphinxext/doi_role.py	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/sphinxext/doi_role.py	2022-05-19 12:16:26.456783000 +0000
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+"""
+    doilinks
+    ~~~~~~~~
+    Extension to add links to DOIs. With this extension you can use e.g.
+    :doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will
+    create a link to a DOI resolver
+    (``https://doi.org/10.1016/S0022-2836(05)80360-2``).
+    The link caption will be the raw DOI.
+    You can also give an explicit caption, e.g.
+    :doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`.
+
+    :copyright: Copyright 2015  Jon Lund Steffensen. Based on extlinks by
+        the Sphinx team.
+    :license: BSD.
+"""
+
+from docutils import nodes, utils
+
+from sphinx.util.nodes import split_explicit_title
+
+
+def reference_role(typ, rawtext, text, lineno, inliner, options={}, content=[]):
+    text = utils.unescape(text)
+    has_explicit_title, title, part = split_explicit_title(text)
+    if typ in ["arXiv", "arxiv"]:
+        full_url = "https://arxiv.org/abs/" + part
+        if not has_explicit_title:
+            title = "arXiv:" + part
+        pnode = nodes.reference(title, title, internal=False, refuri=full_url)
+        return [pnode], []
+    if typ in ["doi", "DOI"]:
+        full_url = "https://doi.org/" + part
+        if not has_explicit_title:
+            title = "DOI:" + part
+        pnode = nodes.reference(title, title, internal=False, refuri=full_url)
+        return [pnode], []
+
+
+def setup_link_role(app):
+    app.add_role("arxiv", reference_role, override=True)
+    app.add_role("arXiv", reference_role, override=True)
+    app.add_role("doi", reference_role, override=True)
+    app.add_role("DOI", reference_role, override=True)
+
+
+def setup(app):
+    app.connect("builder-inited", setup_link_role)
+    return {"version": "0.1", "parallel_read_safe": True}
diff -pruN 0.23.2-5/doc/sphinxext/github_link.py 1.1.1-1/doc/sphinxext/github_link.py
--- 0.23.2-5/doc/sphinxext/github_link.py	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/sphinxext/github_link.py	2022-05-19 12:16:26.456783000 +0000
@@ -5,16 +5,16 @@ import os
 import sys
 from functools import partial
 
-REVISION_CMD = 'git rev-parse --short HEAD'
+REVISION_CMD = "git rev-parse --short HEAD"
 
 
 def _get_git_revision():
     try:
         revision = subprocess.check_output(REVISION_CMD.split()).strip()
     except (subprocess.CalledProcessError, OSError):
-        print('Failed to execute git to get revision')
+        print("Failed to execute git to get revision")
         return None
-    return revision.decode('utf-8')
+    return revision.decode("utf-8")
 
 
 def _linkcode_resolve(domain, info, package, url_fmt, revision):
@@ -34,14 +34,14 @@ def _linkcode_resolve(domain, info, pack
 
     if revision is None:
         return
-    if domain not in ('py', 'pyx'):
+    if domain not in ("py", "pyx"):
         return
-    if not info.get('module') or not info.get('fullname'):
+    if not info.get("module") or not info.get("fullname"):
         return
 
-    class_name = info['fullname'].split('.')[0]
-    module = __import__(info['module'], fromlist=[class_name])
-    obj = attrgetter(info['fullname'])(module)
+    class_name = info["fullname"].split(".")[0]
+    module = __import__(info["module"], fromlist=[class_name])
+    obj = attrgetter(info["fullname"])(module)
 
     # Unwrap the object to get the correct source
     # file in case that is wrapped by a decorator
@@ -59,14 +59,12 @@ def _linkcode_resolve(domain, info, pack
     if not fn:
         return
 
-    fn = os.path.relpath(fn,
-                         start=os.path.dirname(__import__(package).__file__))
+    fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__))
     try:
         lineno = inspect.getsourcelines(obj)[1]
     except Exception:
-        lineno = ''
-    return url_fmt.format(revision=revision, package=package,
-                          path=fn, lineno=lineno)
+        lineno = ""
+    return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno)
 
 
 def make_linkcode_resolve(package, url_fmt):
@@ -81,5 +79,6 @@ def make_linkcode_resolve(package, url_f
                                    '{path}#L{lineno}')
     """
     revision = _get_git_revision()
-    return partial(_linkcode_resolve, revision=revision, package=package,
-                   url_fmt=url_fmt)
+    return partial(
+        _linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt
+    )
diff -pruN 0.23.2-5/doc/sphinxext/sphinx_issues.py 1.1.1-1/doc/sphinxext/sphinx_issues.py
--- 0.23.2-5/doc/sphinxext/sphinx_issues.py	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/sphinxext/sphinx_issues.py	2022-05-19 12:16:26.456783000 +0000
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """A Sphinx extension for linking to your project's issue tracker.
 
 Copyright 2014 Steven Loria
@@ -120,8 +119,9 @@ class IssueRole(object):
                 )
             else:
                 raise ValueError(
-                    "Neither {} nor issues_github_path "
-                    "is set".format(self.uri_config_option)
+                    "Neither {} nor issues_github_path is set".format(
+                        self.uri_config_option
+                    )
                 )
             issue_text = self.format_text(issue_no)
             link = nodes.reference(text=issue_text, refuri=ref, **options)
diff -pruN 0.23.2-5/doc/supervised_learning.rst 1.1.1-1/doc/supervised_learning.rst
--- 0.23.2-5/doc/supervised_learning.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/supervised_learning.rst	2022-05-19 12:16:26.456783000 +0000
@@ -1,9 +1,13 @@
+.. Places parent toc into the sidebar
+
+:parenttoc: True
+
 .. include:: includes/big_toc_css.rst
 
 .. _supervised-learning:
 
 Supervised learning
------------------------
+-------------------
 
 .. toctree::
     :maxdepth: 2
@@ -21,7 +25,7 @@ Supervised learning
     modules/ensemble
     modules/multiclass
     modules/feature_selection.rst
-    modules/label_propagation.rst
+    modules/semi_supervised.rst
     modules/isotonic.rst
     modules/calibration.rst
     modules/neural_networks_supervised
diff -pruN 0.23.2-5/doc/support.rst 1.1.1-1/doc/support.rst
--- 0.23.2-5/doc/support.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/support.rst	2022-05-19 12:16:26.456783000 +0000
@@ -39,13 +39,15 @@ what you observed instead in the details
 Code and data snippets are welcome. Minimalistic (up to ~20 lines long)
 reproduction script very helpful.
 
-Please describe the nature of your data and the how you preprocessed it:
+Please describe the nature of your data and how you preprocessed it:
 what is the number of samples, what is the number and type of features
 (i.d. categorical or numerical) and for supervised learning tasks,
 what target are your trying to predict: binary, multiclass (1 out of
 ``n_classes``) or multilabel (``k`` out of ``n_classes``) classification
 or continuous variable regression.
 
+User questions should **not be asked on the bug tracker**, as it crowds
+the list of issues and makes the development of the project harder.
 
 .. _bug_tracker:
 
@@ -62,21 +64,21 @@ Don't forget to include:
 
   - expected outcome,
 
-  - observed outcome or python (or gdb) tracebacks
+  - observed outcome or Python (or gdb) tracebacks
 
 To help developers fix your bug faster, please link to a https://gist.github.com
 holding a standalone minimalistic python script that reproduces your bug and
-optionally a minimalistic subsample of your dataset (for instance exported
+optionally a minimalistic subsample of your dataset (for instance, exported
 as CSV files using ``numpy.savetxt``).
 
-Note: gists are git cloneable repositories and thus you can use git to
+Note: Gists are Git cloneable repositories and thus you can use Git to
 push datafiles to them.
 
 
 .. _gitter:
 
 Gitter
-===
+======
 
 Some developers like to hang out on scikit-learn Gitter room:
 https://gitter.im/scikit-learn/scikit-learn.
diff -pruN 0.23.2-5/doc/templates/class.rst 1.1.1-1/doc/templates/class.rst
--- 0.23.2-5/doc/templates/class.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/templates/class.rst	2022-05-19 12:16:26.456783000 +0000
@@ -5,10 +5,6 @@
 
 .. autoclass:: {{ objname }}
 
-   {% block methods %}
-   .. automethod:: __init__
-   {% endblock %}
-
 .. include:: {{module}}.{{objname}}.examples
 
 .. raw:: html
diff -pruN 0.23.2-5/doc/templates/class_with_call.rst 1.1.1-1/doc/templates/class_with_call.rst
--- 0.23.2-5/doc/templates/class_with_call.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/templates/class_with_call.rst	2022-05-19 12:16:26.456783000 +0000
@@ -6,7 +6,6 @@
 .. autoclass:: {{ objname }}
 
    {% block methods %}
-   .. automethod:: __init__
    .. automethod:: __call__
    {% endblock %}
 
diff -pruN 0.23.2-5/doc/templates/class_without_init.rst 1.1.1-1/doc/templates/class_without_init.rst
--- 0.23.2-5/doc/templates/class_without_init.rst	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/templates/class_without_init.rst	1970-01-01 00:00:00.000000000 +0000
@@ -1,12 +0,0 @@
-:mod:`{{module}}`.{{objname}}
-{{ underline }}==============
-
-.. currentmodule:: {{ module }}
-
-.. autoclass:: {{ objname }}
-
-.. include:: {{module}}.{{objname}}.examples
-
-.. raw:: html
-
-    <div class="clearer"></div>
diff -pruN 0.23.2-5/doc/templates/documentation.html 1.1.1-1/doc/templates/documentation.html
--- 0.23.2-5/doc/templates/documentation.html	2020-08-04 12:12:58.880675600 +0000
+++ 1.1.1-1/doc/templates/documentation.html	1970-01-01 00:00:00.000000000 +0000
@@ -1,14 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <meta http-equiv="Refresh" content="0; url={{ pathto('index') }}" />
-    <meta name="Description" content="scikit-learn: machine learning in Python">
-    <link rel="canonical" href="{{ pathto('index') }}" />
-    <title>scikit-learn: machine learning in Python</title>
-  </head>
-  <body>
-    <p>You will be automatically redirected to the <a href="{{ pathto('index') }}">main page</a>.</p>
-  </body>
-</html>
diff -pruN 0.23.2-5/doc/templates/index.html 1.1.1-1/doc/templates/index.html
--- 0.23.2-5/doc/templates/index.html	2020-08-04 12:12:58.884675500 +0000
+++ 1.1.1-1/doc/templates/index.html	2022-05-19 12:16:26.456783000 +0000
@@ -1,5 +1,15 @@
 {% extends "layout.html" %}
 {% set title = 'scikit-learn: machine learning in Python' %}
+
+{% if theme_link_to_live_contributing_page|tobool %}
+  {%- set contributing_link = "https://scikit-learn.org/dev/developers/contributing.html" %}
+  {%- set contributing_attrs = 'target="_blank" rel="noopener noreferrer"' %}
+{%- else %}
+  {%- set contributing_link = pathto('developers/contributing') %}
+  {%- set contributing_attrs = '' %}
+{%- endif %}
+
+
 {% block content %}
 <div class="container-fluid sk-landing-bg py-3">
   <div class="container sk-landing-container">
@@ -53,8 +63,8 @@
           <p class="card-text"><strong>Applications:</strong> Drug response, Stock prices.</br>
           <strong>Algorithms:</strong>
           <a href="modules/svm.html#svm-regression">SVR</a>,
-          <a href="modules/linear_model.html#ridge-regression">nearest neighbors</a>,
-          <a href="modules/linear_model.html#lasso">random forest</a>,
+          <a href="modules/neighbors.html#regression">nearest neighbors</a>,
+          <a href="modules/ensemble.html#forest">random forest</a>,
           and <a href="supervised_learning.html#supervised-learning">more...</a></p>
         </div>
         <div class="overflow-hidden mx-2 text-center flex-fill">
@@ -92,7 +102,7 @@
           <p class="card-text">Reducing the number of random variables to consider.</p>
           <p class="card-text"><strong>Applications:</strong> Visualization, Increased efficiency</br>
           <strong>Algorithms:</strong>
-          <a href="modules/decomposition.html#pca">k-Means</a>,
+          <a href="modules/decomposition.html#pca">PCA</a>,
           <a href="modules/feature_selection.html#feature-selection">feature selection</a>,
           <a href="modules/decomposition.html#nmf">non-negative matrix factorization</a>,
           and <a href="modules/decomposition.html#decompositions">more...</a></p>
@@ -155,42 +165,58 @@
         <ul class="sk-landing-call-list list-unstyled">
         <li><strong>On-going development:</strong>
         <a href="https://scikit-learn.org/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
+        </li>
+        <li><strong>May 2022.</strong> scikit-learn 1.1.1 is available for download (<a href="whats_new/v1.1.html#version-1-1-1">Changelog</a>).
+        </li>
+        <li><strong>May 2022.</strong> scikit-learn 1.1.0 is available for download (<a href="whats_new/v1.1.html#version-1-1-0">Changelog</a>).
+        </li>
+        <li><strong>December 2021.</strong> scikit-learn 1.0.2 is available for download (<a href="whats_new/v1.0.html#version-1-0-2">Changelog</a>).
+        </li>
+        <li><strong>October 2021.</strong> scikit-learn 1.0.1 is available for download (<a href="whats_new/v1.0.html#version-1-0-1">Changelog</a>).
+        </li>
+        <li><strong>September 2021.</strong> scikit-learn 1.0 is available for download (<a href="whats_new/v1.0.html#version-1-0">Changelog</a>).
+        </li>
+        <li><strong>April 2021.</strong> scikit-learn 0.24.2 is available for download (<a href="whats_new/v0.24.html#version-0-24-2">Changelog</a>).
+        </li>
+        <li><strong>January 2021.</strong> scikit-learn 0.24.1 is available for download (<a href="whats_new/v0.24.html#version-0-24-1">Changelog</a>).
+        </li>
+        <li><strong>December 2020.</strong> scikit-learn 0.24.0 is available for download (<a href="whats_new/v0.24.html#version-0-24-0">Changelog</a>).
+        </li>
         <li><strong>August 2020.</strong> scikit-learn 0.23.2 is available for download (<a href="whats_new/v0.23.html#version-0-23-2">Changelog</a>).
         </li>
         <li><strong>May 2020.</strong> scikit-learn 0.23.1 is available for download (<a href="whats_new/v0.23.html#version-0-23-1">Changelog</a>).
         </li>
         <li><strong>May 2020.</strong> scikit-learn 0.23.0 is available for download (<a href="whats_new/v0.23.html#version-0-23-0">Changelog</a>).
         </li>
-        <li><strong>Scikit-learn from 0.23 requires Python 3.6 or greater.</strong>
+        <li><strong>Scikit-learn from 0.23 requires Python 3.6 or newer.</strong>
         </li>
         <li><strong>March 2020.</strong> scikit-learn 0.22.2 is available for download (<a href="whats_new/v0.22.html#version-0-22-2">Changelog</a>).
         <li><strong>January 2020.</strong> scikit-learn 0.22.1 is available for download (<a href="whats_new/v0.22.html#version-0-22-1">Changelog</a>).
         <li><strong>December 2019.</strong> scikit-learn 0.22 is available for download (<a href="whats_new/v0.22.html#version-0-22-0">Changelog</a> and <a href="{{ pathto('auto_examples/release_highlights/plot_release_highlights_0_22_0') }}">Release Highlights</a>).
         </li>
-        <li><strong>Scikit-learn from 0.21 requires Python 3.5 or greater.</strong>
-        </li>
-        <li><strong>July 2019.</strong> scikit-learn 0.21.3 (<a href="whats_new/v0.21.html#version-0-21-3">Changelog</a>) and 0.20.4 (<a href="whats_new/v0.20.html#version-0-20-4">Changelog</a>) are available for download.
-        </li>
-        <li><strong>May 2019.</strong> scikit-learn 0.21.0 to 0.21.2 are available for download (<a href="whats_new/v0.21.html#version-0-21-2">Changelog</a>).
-        </li>
         </ul>
       </div>
       <div class="col-md-4">
         <h4 class="sk-landing-call-header">Community</h4>
         <ul class="sk-landing-call-list list-unstyled">
-        <li><strong>About us:</strong> See <a href="about.html#people">authors</a> and <a href="developers/contributing.html">contributing</a></li>
+        <li><strong>About us:</strong> See <a href="about.html#people">authors</a> and <a href="{{ contributing_link }}" {{ contributing_attrs }}>contributing</a></li>
         <li><strong>More Machine Learning:</strong> Find <a href="related_projects.html">related projects</a></li>
         <li><strong>Questions?</strong> See <a href="faq.html">FAQ</a> and <a href="https://stackoverflow.com/questions/tagged/scikit-learn">stackoverflow</a></li>
-        <li><strong>Mailing list:</strong> <a href="https://mail.python.org/mailman/listinfo/scikit-learn">scikit-learn@python.org</a></li>
+        <li><strong>Subscribe to the</strong> <a href="https://mail.python.org/mailman/listinfo/scikit-learn">mailing list</a></li>
         <li><strong>Gitter:</strong> <a href="https://gitter.im/scikit-learn/scikit-learn">gitter.im/scikit-learn</a></li>
+        <li><strong>Blog:</strong> <a href="https://blog.scikit-learn.org">blog.scikit-learn.org</a></li>
+        <li><strong>Calendar:</strong> <a href="https://blog.scikit-learn.org/calendar/">calendar</a></li>
+        <li><strong>Twitter:</strong> <a href="https://twitter.com/scikit_learn">@scikit_learn</a></li>
+        <li><strong>Twitter (commits):</strong> <a href="https://twitter.com/sklearn_commits">@sklearn_commits</a></li>
+        <li><strong>LinkedIn:</strong> <a href="https://www.linkedin.com/company/scikit-learn">linkedin/scikit-learn</a></li>
+        <li><strong>YouTube:</strong> <a href="https://www.youtube.com/channel/UCJosFjYm0ZYVUARxuOZqnnw/playlists">youtube.com/scikit-learn</a></li>
+        <li><strong>Facebook:</strong> <a href="https://www.facebook.com/scikitlearnofficial/">@scikitlearnofficial</a></li>
+        <li><strong>Instagram:</strong> <a href="https://www.instagram.com/scikitlearnofficial/">@scikitlearnofficial</a></li>
+        <li><strong>TikTok:</strong> <a href="https://www.tiktok.com/@scikit.learn">@scikit.learn</a></li>
         <li>Communication on all channels should respect <a href="https://www.python.org/psf/conduct/">PSF's code of conduct.</a></li>
         </ul>
 
-        <form target="_top" id="paypal-form" method="post" action="https://www.paypal.com/cgi-bin/webscr">
-          <input type="hidden" value="_s-xclick" name="cmd">
-          <input type="hidden" value="74EYUMF3FTSW8" name="hosted_button_id">
-        </form>
-        <a class="btn btn-warning btn-big sk-donate-btn mb-1" onclick="document.getElementById('paypal-form').submit(); ">Help us, <strong>donate!</strong></a>
+        <a class="btn btn-warning btn-big sk-donate-btn mb-1" href="https://numfocus.org/donate-to-scikit-learn">Help us, <strong>donate!</strong></a>
         <a class="btn btn-warning btn-big mb-1" href="about.html#citing-scikit-learn"><strong>Cite us!</strong></a>
       </div>
       <div class="col-md-4">
@@ -238,17 +264,15 @@
                   scikit-learn development and maintenance are financially supported by
                 </p>
                 <img class="sk-footer-funding-logo" src="_static/inria-small.png" title="INRIA">
-                <img class="sk-footer-funding-logo" src="_static/columbia-small.png" title="Columbia University">
-                <img class="sk-footer-funding-logo" src="_static/sloan_logo-small.png" title="Alfred P. Sloan Foundation" >
-                <img class="sk-footer-funding-logo" src="_static/sydney-stacked-small.png" title="The University of Sydney">
-                <img class="sk-footer-funding-logo" src="_static/microsoft-small.png" title="Microsoft" >
                 <img class="sk-footer-funding-logo" src="_static/bcg-small.png" title="Boston Consulting Group" >
                 <img class="sk-footer-funding-logo" src="_static/axa-small.png" title="AXA Assurances" >
                 <img class="sk-footer-funding-logo" src="_static/bnp-small.png" title="BNP Paris Bas Cardif" >
                 <img class="sk-footer-funding-logo" src="_static/fujitsu-small.png" title="Fujitsu" >
-                <img class="sk-footer-funding-logo" src="_static/intel-small.png" title="Intel" >
-                <img class="sk-footer-funding-logo" src="_static/nvidia-small.png" title="Nvidia" >
+                <img class="sk-footer-funding-logo" src="_static/microsoft-small.png" title="Microsoft" >
                 <img class="sk-footer-funding-logo" src="_static/dataiku-small.png" title="Dataiku" >
+                <img class="sk-footer-funding-logo" src="_static/logo_APHP.png" title="APHP" >
+                <img class="sk-footer-funding-logo" src="_static/huggingface_logo-noborder.png" title="Hugging Face" >
+                <img class="sk-footer-funding-logo" src="_static/quansight-labs-small.png" title="Quansight Labs" >
         </div>
         </a>
   </div>
diff -pruN 0.23.2-5/doc/templates/redirects.html 1.1.1-1/doc/templates/redirects.html
--- 0.23.2-5/doc/templates/redirects.html	1970-01-01 00:00:00.000000000 +0000
+++ 1.1.1-1/doc/templates/redirects.html	2022-05-19 12:16:26.456783000 +0000
@@ -0,0 +1,15 @@
+{% set redirect = pathto(redirects[pagename]) %}
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <meta http-equiv="Refresh" content="0; url={{ redirect }}" />
+    <meta name="Description" content="scikit-learn: machine learning in Python">
+    <link rel="canonical" href="{{ redirect }}" />
+    <title>scikit-learn: machine learning in Python</title>
+  </head>
+  <body>
+    <p>You will be automatically redirected to the <a href="{{ redirect }}">new location of this page</a>.</p>
+  </body>
+</html>
Binary files 0.23.2-5/doc/testimonials/images/bnp_paribas_cardif.png and 1.1.1-1/doc/testimonials/images/bnp_paribas_cardif.png differ
diff -pruN 0.23.2-5/doc/testimonials/testimonials.rst 1.1.1-1/doc/testimonials/testimonials.rst
--- 0.23.2-5/doc/testimonials/testimonials.rst	2020-08-04 12:12:58.884675500 +0000
+++ 1.1.1-1/doc/testimonials/testimonials.rst	2022-05-19 12:16:26.460783200 +0000
@@ -1108,3 +1108,44 @@ Michael Fitzke Next Generation Technolog
    </div>
    </div>
 
+
+`BNP Paribas Cardif <https://www.bnpparibascardif.com/>`_
+---------------------------------------------------------
+
+.. raw:: html
+
+   <div class="sk-testimonial-div">
+   <div class="sk-testimonial-div-box">
+
+BNP Paribas Cardif uses scikit-learn for several of its machine learning models
+in production. Our internal community of developers and data scientists has
+been using scikit-learn since 2015, for several reasons: the quality of the
+developments, documentation and contribution governance, and the sheer size of
+the contributing community. We even explicitly mention the use of
+scikit-learn's pipelines in our internal model risk governance as one of our
+good practices to decrease operational risks and overfitting risk. As a way to
+support open source software development and in particular scikit-learn
+project, we decided to participate to scikit-learn's consortium at La Fondation
+Inria since its creation in 2018.
+
+.. raw:: html
+
+   <span class="testimonial-author">
+
+Sébastien Conort, Chief Data Scientist, BNP Paribas Cardif
+
+.. raw:: html
+
+   </span>
+   </div>
+   <div class="sk-testimonial-div-box">
+
+.. image:: images/bnp_paribas_cardif.png
+    :width: 120pt
+    :align: center
+    :target: https://www.bnpparibascardif.com/
+
+.. raw:: html
+
+   </div>
+   </div>
diff -pruN 0.23.2-5/doc/themes/scikit-learn/layout.html 1.1.1-1/doc/themes/scikit-learn/layout.html
--- 0.23.2-5/doc/themes/scikit-learn/layout.html	2020-08-04 12:12:58.888675500 +0000
+++ 1.1.1-1/doc/themes/scikit-learn/layout.html	1970-01-01 00:00:00.000000000 +0000
@@ -1,361 +0,0 @@
-{#
-    scikit-learn/layout.html
-    ~~~~~~~~~~~~~~~~~
-
-    Layout for scikit-learn, after a design made by Angel Soler
-    (http://angelsoler.net)
-
-    Update: Next-page button added - 16/03/2012 - Jaques Grobler
-
-
-    :copyright: Fabian Pedregosa
-    :license: BSD
-#}
-{% extends "basic/layout.html" %}
-
-{% block htmltitle %}
-  {{ super() }}
-  <!-- htmltitle is before nature.css - we use this hack to load bootstrap first -->
-  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <link rel="stylesheet" href="{{ pathto('_static/css/bootstrap.min.css', 1) }}" media="screen" />
-  <link rel="stylesheet" href="{{ pathto('_static/css/bootstrap-responsive.css', 1) }}"/>
-{% endblock %}
-
-{% block extrahead %}
-  {% if pagename != 'index' %}
-  {% endif %}
-  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <script src="{{ pathto('_static/js/bootstrap.min.js', 1)}}" type="text/javascript"></script>
-  <script>
-     VERSION_SUBDIR = (function(groups) {
-         return groups ? groups[1] : null;
-     })(location.href.match(/^https?:\/\/scikit-learn.org\/([^\/]+)/));
-  </script>
-  <link rel="canonical" href="http://scikit-learn.org/stable/{{pagename}}.html" />
-
-  <script type="text/javascript">
-    $("div.buttonNext, div.buttonPrevious").hover(
-       function () {
-           $(this).css('background-color', '#FF9C34');
-       },
-       function () {
-           $(this).css('background-color', '#A7D6E2');
-       }
-    );
-    function showMenu() {
-      var topNav = document.getElementById("scikit-navbar");
-      if (topNav.className === "navbar") {
-          topNav.className += " responsive";
-      } else {
-          topNav.className = "navbar";
-      }
-    };
-  </script>
-{% endblock %}
-
-{%- if pagename == 'index' %}
-    {% set title = 'scikit-learn: machine learning in Python' %}
-{%- endif %}
-
-{%- if pagename == 'documentation' %}
-    {% set title = 'Documentation scikit-learn: machine learning in Python' %}
-{%- endif %}
-
-{% block header %}
-
-<div class="header-wrapper">
-    <div class="header">
-        {%- if logo %}
-        <p class="logo"><a href="{{ pathto(master_doc) }}">
-            <img src="{{ pathto('_static/' + logo, 1) }}" alt="Logo"/>
-        </a>
-        </p>
-        {%- endif %}
-
-        {%- block navbar -%}
-        <div class="navbar" id="scikit-navbar">
-            <ul>
-                <li><a href="{{pathto('index')}}">Home</a></li>
-                <li><a href="{{pathto('install')}}">Installation</a></li>
-                <li class="btn-li"><div class="btn-group">
-              <a href="{{pathto('documentation')}}">Documentation</a>
-              <a class="btn dropdown-toggle" data-toggle="dropdown">
-                 <span class="caret"></span>
-              </a>
-              <ul class="dropdown-menu">
-            <li class="link-title">Scikit-learn <script>document.write(DOCUMENTATION_OPTIONS.VERSION + (VERSION_SUBDIR ? " (" + VERSION_SUBDIR + ")" : ""));</script></li>
-            <li><a href="{{ pathto('tutorial/index') }}">Tutorials</a></li>
-            <li><a href="{{ pathto('user_guide') }}">User guide</a></li>
-            <li><a href="{{ pathto('modules/classes') }}">API</a></li>
-            <li><a href="{{ pathto('glossary') }}">Glossary</a></li>
-            <li><a href="{{ pathto('faq') }}">FAQ</a></li>
-            <li><a href="{{ pathto('developers/index') }}">Development</a></li>
-            <li><a href="{{ pathto('roadmap') }}">Roadmap</a></li>
-            <li><a href="{{ pathto('about') }}">About us</a></li>
-            <li class="divider"></li>
-                <script>if (VERSION_SUBDIR != "stable") document.write('<li><a href="http://scikit-learn.org/stable/documentation.html">Stable version</a></li>')</script>
-                <script>if (VERSION_SUBDIR != "dev") document.write('<li><a href="http://scikit-learn.org/dev/documentation.html">Development version</a></li>')</script>
-                <li><a href="http://scikit-learn.org/dev/versions.html">All available versions</a></li>
-                <li><a href="{{ pathto('_downloads/scikit-learn-docs.pdf', 1) }}">PDF documentation</a></li>
-              </ul>
-            </div>
-        </li>
-            <li><a href="{{pathto('auto_examples/index')}}">Examples</a></li>
-            </ul>
-            <a href="javascript:void(0);" onclick="showMenu()">
-                <div class="nav-icon">
-                    <div class="hamburger-line"></div>
-                    <div class="hamburger-line"></div>
-                    <div class="hamburger-line"></div>
-                </div>
-            </a>
-            <div class="search_form">
-                <div class="gcse-search" id="cse" style="width: 100%;"></div>
-            </div>
-        </div> <!-- end navbar -->
-
-        {%- endblock -%}
-
-    </div>
-</div>
-
-
-{%- if pagename == 'index' %}
-<!-- Banner -->
-<div class="container banner-container">
-  <div class="row-fluid banner-inner">
-    <div class="hidden-phone">
-      <div class="span6">
-	<div class="row-fluid">
-          <div class="offset2 span8"><div id="index_carousel_tn" class="thumbnail">
-	      <div id="examples_carousel" class="carousel slide" data-interval="false">
-		<ol id="scikit_learn_index_indicators" class="carousel-indicators">
-		  <li data-target="#examples_carousel" data-slide-to="0" class="active"></li>
-		  <li data-target="#examples_carousel" data-slide-to="1"></li>
-		  <li data-target="#examples_carousel" data-slide-to="2"></li>
-		  <li data-target="#examples_carousel" data-slide-to="3"></li>
-		  <li data-target="#examples_carousel" data-slide-to="4"></li>
-		  <li data-target="#examples_carousel" data-slide-to="5"></li>
-		  <li data-target="#examples_carousel" data-slide-to="6"></li>
-		  <li data-target="#examples_carousel" data-slide-to="7"></li>
-		  <li data-target="#examples_carousel" data-slide-to="8"></li>
-		  <li data-target="#examples_carousel" data-slide-to="9"></li>
-		</ol>
-		<!-- Carousel items -->
-		<div class="carousel-inner">
-		  <div class="active item">
-		    <a href="{{pathto('auto_examples/classification/plot_classifier_comparison') }}">
-		      <div class="crop-wrapper" style="width: 380px; height: 190px; overflow: hidden">
-			<img src="_images/sphx_glr_plot_classifier_comparison_001_carousel.png"
-			     style="max-height: 200px; max-width: 629px; margin-left: -21px;"></div></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{pathto('auto_examples/plot_anomaly_comparison') }}">
-		      <img src="_images/sphx_glr_plot_anomaly_comparison_thumb.png"></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{pathto('auto_examples/neighbors/plot_species_kde') }}">
-		      <img src="_images/sphx_glr_plot_species_kde_thumb.png"></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{pathto('auto_examples/linear_model/plot_lasso_lars') }}">
-		      <img src="_images/sphx_glr_plot_lasso_lars_thumb.png"></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{ pathto('auto_examples/cluster/plot_cluster_comparison') }}">
-		      <img src="_images/sphx_glr_plot_cluster_comparison_thumb.png"></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{ pathto('auto_examples/ensemble/plot_adaboost_twoclass') }}">
-		      <img src="_images/sphx_glr_plot_adaboost_twoclass_001_carousel.png"></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{pathto('auto_examples/gaussian_process/plot_gpr_co2') }}">
-		      <img src="_images/sphx_glr_plot_gpr_co2_001_carousel.png"></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{ pathto('auto_examples/manifold/plot_compare_methods') }}">
-		      <img src="_images/sphx_glr_plot_compare_methods_001_carousel.png"></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{ pathto('auto_examples/mixture/plot_gmm_pdf') }}">
-		      <img src="_images/sphx_glr_plot_gmm_pdf_thumb.png"></a>
-		  </div>
-		  <div class="item">
-		    <a href="{{ pathto('auto_examples/cluster/plot_coin_ward_segmentation') }}">
-		      <img src="_images/sphx_glr_plot_coin_ward_segmentation_thumb.png"></a>
-		  </div>
-		</div>
-		<!-- Carousel nav -->
-		<a class="carousel-control left" href="#examples_carousel" data-slide="prev">&lsaquo;</a>
-		<a class="carousel-control right" href="#examples_carousel" data-slide="next">&rsaquo;</a>
-	      </div>
-	  </div></div>
-	</div>
-      </div>
-    </div>
-    <div id="intro_to_sklearn_p" class="span6">
-      <h1>scikit-learn</h1>
-      <h2>Machine Learning in Python</h2>
-      <ul>
-        <li>Simple and efficient tools for data mining and data analysis</li>
-        <li>Accessible to everybody, and reusable in various contexts</li>
-        <li>Built on NumPy, SciPy, and matplotlib</li>
-        <li>Open source, commercially usable - BSD license</li>
-      </ul>
-    </div>
-  </div>
-</div>
-{%- endif %}
-
-{% endblock %}
-
-{% block content %}
-<!-- GitHub "fork me" ribbon -->
-<a href="https://github.com/scikit-learn/scikit-learn">
-  <img class="fork-me"
-       style="position: absolute; top: 0; right: 0; border: 0;"
-       src="{{ pathto('_static/img/forkme.png', 1) }}"
-       alt="Fork me on GitHub" />
-</a>
-
-<div class="content-wrapper">
-
-{%- if pagename != 'index' and pagename != 'documentation' %}
-    <div class="sphinxsidebar">
-    <div class="sphinxsidebarwrapper">
-
-    {%- if rellinks %}
-
-    {%- if parents %}
-        <div class="rel">
-    {% else %}
-        <div class="rel rellarge">
-    {% endif %}
-
-    {%- for rellink in rellinks|reverse %}
-        <div class="rellink">
-        <a href="{{ pathto(rellink[0]) }}"
-        {{ accesskey(rellink[2]) }}>{{ rellink[3]|capitalize }}
-        <br/>
-        <span class="smallrellink">
-        {{ rellink[1]|striptags|truncate(16, killwords=True) }}
-        </span>
-        {%- if rellink[1]|striptags %}
-            <span class="hiddenrellink">
-            {{ rellink[1]|striptags}}
-            </span>
-        {%- endif %}
-        </a>
-        </div>
-        {%- if not loop.last %}
-            <div class="spacer">
-            &nbsp;
-            </div>
-        {%- endif %}
-    {%- endfor %}
-
-    <!-- Ad a link to the 'up' page -->
-
-    {%- if parents %}
-        <div class="spacer">
-        &nbsp;
-        </div>
-        <div class="rellink">
-        <a href="{{ parents[-1].link|e }}">
-        Up
-        <br/>
-        <span class="smallrellink">
-        {{ parents[-1].title|striptags|truncate(16, killwords=True) }}
-        </span>
-        {%- if parents[-1].title|striptags != '<no title>' %}
-            <span class="hiddenrellink">
-            {{ parents[-1].title|striptags }}
-            </span>
-            {% endif %}
-        </a>
-        </div>
-    {%- endif %}
-    </div>
-    {% endif %}
-
-    {%- if pagename != "install" %}
-      <p class="doc-version"><b>{{project}} v{{ release|e }}</b><br/>
-      <a href="http://scikit-learn.org/dev/versions.html">Other versions</a></p>
-    {%- endif %}
-    <p class="citing">Please <b><a href="{{ pathto('about').replace('#', '') }}#citing-scikit-learn" style="font-size: 110%;">cite us </a></b>if you use the software.</p>
-    {{ toc }}
-    </div>
-</div>
-{% if theme_collapsiblesidebar|tobool %}
-<input type="checkbox" id="nav-trigger" class="nav-trigger" checked />
-<label for="nav-trigger"></label>
-{% endif %}
-{% endif %}
-
-
-      <div class="content">
-          {%- block document %}
-            {{ super() }}
-          {%- endblock %}
-        <div class="clearer"></div>
-      </div>
-    </div>
-{% endblock %}
-
-{% block relbar1 %}{% endblock %}
-{% block relbar2 %}{% endblock %}
-
-
-{%- block footer %}
-    <div class="footer">
-    {%- if pagename != 'index' %}
-     {%- if show_copyright %}
-      {%- if hasdoc('copyright') %}
-        {% trans path=pathto('copyright'), copyright=copyright|e %}&copy; {{ copyright }}.{% endtrans %}
-      {%- else %}
-        {% trans copyright=copyright|e %}&copy; {{ copyright }}.{% endtrans %}
-      {%- endif %}
-     {%- endif %}
-     {%- if last_updated %}
-      {% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %}
-     {%- endif %}
-     {%- if show_source and has_source and sourcename %}
-      <a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow">{{ _('Show this page source') }}</a>
-     {%- endif %}
-    {%- endif %}
-    </div>
-
-    {%- if pagename != 'index' %}
-    {%- if parents %}
-     <div class="rel">
-    {% else %}
-     <div class="rel rellarge">
-    {% endif %}
-    {%- for rellink in rellinks|reverse %}
-    <div class="{{ loop.cycle('buttonPrevious', 'buttonNext') }}">
-      <a href="{{ pathto(rellink[0]) }}">{{ loop.cycle('Previous', 'Next') }}
-      </a>
-    </div>
-    {%- endfor %}
-    {% endif %}
-     </div>
-
-    {% if theme_google_analytics|tobool %}
-    <script>
-        window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
-        ga('create', 'UA-22606712-2', 'auto');
-        ga('set', 'anonymizeIp', true);
-        ga('send', 'pageview');
-    </script>
-    <script async src='https://www.google-analytics.com/analytics.js'></script>
-    {% endif %}
-    <script>
-      (function() {
-        var cx = '016639176250731907682:tjtqbvtvij0';
-        var gcse = document.createElement('script'); gcse.type = 'text/javascript'; gcse.async = true;
-        gcse.src = 'https://cse.google.com/cse.js?cx=' + cx;
-        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(gcse, s);
-      })();
-    </script>
-{%- endblock %}
diff -pruN 0.23.2-5/doc/themes/scikit-learn/static/css/bootstrap.css 1.1.1-1/doc/themes/scikit-learn/static/css/bootstrap.css
--- 0.23.2-5/doc/themes/scikit-learn/static/css/bootstrap.css	2020-08-04 12:12:58.888675500 +0000
+++ 1.1.1-1/doc/themes/scikit-learn/static/css/bootstrap.css	1970-01-01 00:00:00.000000000 +0000
@@ -1,6320 +0,0 @@
-/*!
- * Bootstrap v2.3.2
- *
- * Copyright 2012 Twitter, Inc
- * Licensed under the Apache License v2.0
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Designed and built with all the love in the world @twitter by @mdo and @fat.
- */
-.clearfix {
-  *zoom: 1;
-}
-.clearfix:before,
-.clearfix:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.clearfix:after {
-  clear: both;
-}
-.hide-text {
-  font: 0/0 a;
-  color: transparent;
-  text-shadow: none;
-  background-color: transparent;
-  border: 0;
-}
-.input-block-level {
-  display: block;
-  width: 100%;
-  min-height: 30px;
-  -webkit-box-sizing: border-box;
-  -moz-box-sizing: border-box;
-  box-sizing: border-box;
-}
-article,
-aside,
-details,
-figcaption,
-figure,
-footer,
-header,
-hgroup,
-nav,
-section {
-  display: block;
-}
-audio,
-canvas,
-video {
-  display: inline-block;
-  *display: inline;
-  *zoom: 1;
-}
-audio:not([controls]) {
-  display: none;
-}
-html {
-  font-size: 100%;
-  -webkit-text-size-adjust: 100%;
-  -ms-text-size-adjust: 100%;
-}
-a:focus {
-  outline: thin dotted #333;
-  outline: 5px auto -webkit-focus-ring-color;
-  outline-offset: -2px;
-}
-a:hover,
-a:active {
-  outline: 0;
-}
-sub,
-sup {
-  position: relative;
-  font-size: 75%;
-  line-height: 0;
-  vertical-align: baseline;
-}
-sup {
-  top: -0.5em;
-}
-sub {
-  bottom: -0.25em;
-}
-img {
-  /* Responsive images (ensure images don't scale beyond their parents) */
-
-  max-width: 100%;
-  /* Part 1: Set a maximum relative to the parent */
-
-  width: auto\9;
-  /* IE7-8 need help adjusting responsive images */
-
-  height: auto;
-  /* Part 2: Scale the height according to the width, otherwise you get stretching */
-
-  vertical-align: middle;
-  border: 0;
-  -ms-interpolation-mode: bicubic;
-}
-#map_canvas img,
-.google-maps img {
-  max-width: none;
-}
-button,
-input,
-select,
-textarea {
-  margin: 0;
-  font-size: 100%;
-  vertical-align: middle;
-}
-button,
-input {
-  *overflow: visible;
-  line-height: normal;
-}
-button::-moz-focus-inner,
-input::-moz-focus-inner {
-  padding: 0;
-  border: 0;
-}
-button,
-html input[type="button"],
-input[type="reset"],
-input[type="submit"] {
-  -webkit-appearance: button;
-  cursor: pointer;
-}
-label,
-select,
-button,
-input[type="button"],
-input[type="reset"],
-input[type="submit"],
-input[type="radio"],
-input[type="checkbox"] {
-  cursor: pointer;
-}
-input[type="search"] {
-  -webkit-box-sizing: content-box;
-  -moz-box-sizing: content-box;
-  box-sizing: content-box;
-  -webkit-appearance: textfield;
-}
-input[type="search"]::-webkit-search-decoration,
-input[type="search"]::-webkit-search-cancel-button {
-  -webkit-appearance: none;
-}
-textarea {
-  overflow: auto;
-  vertical-align: top;
-}
-@media print {
-  * {
-    text-shadow: none !important;
-    color: #000 !important;
-    background: transparent !important;
-    box-shadow: none !important;
-  }
-  a,
-  a:visited {
-    text-decoration: underline;
-  }
-  a[href]:after {
-    content: " (" attr(href) ")";
-  }
-  abbr[title]:after {
-    content: " (" attr(title) ")";
-  }
-  .ir a:after,
-  a[href^="javascript:"]:after,
-  a[href^="#"]:after {
-    content: "";
-  }
-  pre,
-  blockquote {
-    border: 1px solid #999;
-    page-break-inside: avoid;
-  }
-  thead {
-    display: table-header-group;
-  }
-  tr,
-  img {
-    page-break-inside: avoid;
-  }
-  img {
-    max-width: 100% !important;
-  }
-  @page  {
-    margin: 0.5cm;
-  }
-  p,
-  h2,
-  h3 {
-    orphans: 3;
-    widows: 3;
-  }
-  h2,
-  h3 {
-    page-break-after: avoid;
-  }
-}
-body {
-  margin: 0;
-  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
-  font-size: 14px;
-  line-height: 20px;
-  color: #333333;
-  background-color: #ffffff;
-}
-a {
-  color: #0088cc;
-  text-decoration: none;
-}
-a:hover,
-a:focus {
-  color: #005580;
-  text-decoration: underline;
-}
-.img-rounded {
-  -webkit-border-radius: 6px;
-  -moz-border-radius: 6px;
-  border-radius: 6px;
-}
-.img-polaroid {
-  padding: 4px;
-  background-color: #fff;
-  border: 1px solid #ccc;
-  border: 1px solid rgba(0, 0, 0, 0.2);
-  -webkit-box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
-  -moz-box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
-  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
-}
-.img-circle {
-  -webkit-border-radius: 500px;
-  -moz-border-radius: 500px;
-  border-radius: 500px;
-}
-.row {
-  margin-left: -20px;
-  *zoom: 1;
-}
-.row:before,
-.row:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.row:after {
-  clear: both;
-}
-[class*="span"] {
-  float: left;
-  min-height: 1px;
-  margin-left: 20px;
-}
-.container,
-.navbar-static-top .container,
-.navbar-fixed-top .container,
-.navbar-fixed-bottom .container {
-  width: 940px;
-}
-.span12 {
-  width: 940px;
-}
-.span11 {
-  width: 860px;
-}
-.span10 {
-  width: 780px;
-}
-.span9 {
-  width: 700px;
-}
-.span8 {
-  width: 620px;
-}
-.span7 {
-  width: 540px;
-}
-.span6 {
-  width: 460px;
-}
-.span5 {
-  width: 380px;
-}
-.span4 {
-  width: 300px;
-}
-.span3 {
-  width: 220px;
-}
-.span2 {
-  width: 140px;
-}
-.span1 {
-  width: 60px;
-}
-.offset12 {
-  margin-left: 980px;
-}
-.offset11 {
-  margin-left: 900px;
-}
-.offset10 {
-  margin-left: 820px;
-}
-.offset9 {
-  margin-left: 740px;
-}
-.offset8 {
-  margin-left: 660px;
-}
-.offset7 {
-  margin-left: 580px;
-}
-.offset6 {
-  margin-left: 500px;
-}
-.offset5 {
-  margin-left: 420px;
-}
-.offset4 {
-  margin-left: 340px;
-}
-.offset3 {
-  margin-left: 260px;
-}
-.offset2 {
-  margin-left: 180px;
-}
-.offset1 {
-  margin-left: 100px;
-}
-.row-fluid {
-  width: 100%;
-  *zoom: 1;
-}
-.row-fluid:before,
-.row-fluid:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.row-fluid:after {
-  clear: both;
-}
-.row-fluid [class*="span"] {
-  display: block;
-  width: 100%;
-  min-height: 30px;
-  -webkit-box-sizing: border-box;
-  -moz-box-sizing: border-box;
-  box-sizing: border-box;
-  float: left;
-  margin-left: 2.127659574468085%;
-  *margin-left: 2.074468085106383%;
-}
-.row-fluid [class*="span"]:first-child {
-  margin-left: 0;
-}
-.row-fluid .controls-row [class*="span"] + [class*="span"] {
-  margin-left: 2.127659574468085%;
-}
-.row-fluid .span12 {
-  width: 100%;
-  *width: 99.94680851063829%;
-}
-.row-fluid .span11 {
-  width: 91.48936170212765%;
-  *width: 91.43617021276594%;
-}
-.row-fluid .span10 {
-  width: 82.97872340425532%;
-  *width: 82.92553191489361%;
-}
-.row-fluid .span9 {
-  width: 74.46808510638297%;
-  *width: 74.41489361702126%;
-}
-.row-fluid .span8 {
-  width: 65.95744680851064%;
-  *width: 65.90425531914893%;
-}
-.row-fluid .span7 {
-  width: 57.44680851063829%;
-  *width: 57.39361702127659%;
-}
-.row-fluid .span6 {
-  width: 48.93617021276595%;
-  *width: 48.88297872340425%;
-}
-.row-fluid .span5 {
-  width: 40.42553191489362%;
-  *width: 40.37234042553192%;
-}
-.row-fluid .span4 {
-  width: 31.914893617021278%;
-  *width: 31.861702127659576%;
-}
-.row-fluid .span3 {
-  width: 23.404255319148934%;
-  *width: 23.351063829787233%;
-}
-.row-fluid .span2 {
-  width: 14.893617021276595%;
-  *width: 14.840425531914894%;
-}
-.row-fluid .span1 {
-  width: 6.382978723404255%;
-  *width: 6.329787234042553%;
-}
-.row-fluid .offset12 {
-  margin-left: 104.25531914893617%;
-  *margin-left: 104.14893617021275%;
-}
-.row-fluid .offset12:first-child {
-  margin-left: 102.12765957446808%;
-  *margin-left: 102.02127659574467%;
-}
-.row-fluid .offset11 {
-  margin-left: 95.74468085106382%;
-  *margin-left: 95.6382978723404%;
-}
-.row-fluid .offset11:first-child {
-  margin-left: 93.61702127659574%;
-  *margin-left: 93.51063829787232%;
-}
-.row-fluid .offset10 {
-  margin-left: 87.23404255319149%;
-  *margin-left: 87.12765957446807%;
-}
-.row-fluid .offset10:first-child {
-  margin-left: 85.1063829787234%;
-  *margin-left: 84.99999999999999%;
-}
-.row-fluid .offset9 {
-  margin-left: 78.72340425531914%;
-  *margin-left: 78.61702127659572%;
-}
-.row-fluid .offset9:first-child {
-  margin-left: 76.59574468085106%;
-  *margin-left: 76.48936170212764%;
-}
-.row-fluid .offset8 {
-  margin-left: 70.2127659574468%;
-  *margin-left: 70.10638297872339%;
-}
-.row-fluid .offset8:first-child {
-  margin-left: 68.08510638297872%;
-  *margin-left: 67.9787234042553%;
-}
-.row-fluid .offset7 {
-  margin-left: 61.70212765957446%;
-  *margin-left: 61.59574468085106%;
-}
-.row-fluid .offset7:first-child {
-  margin-left: 59.574468085106375%;
-  *margin-left: 59.46808510638297%;
-}
-.row-fluid .offset6 {
-  margin-left: 53.191489361702125%;
-  *margin-left: 53.085106382978715%;
-}
-.row-fluid .offset6:first-child {
-  margin-left: 51.063829787234035%;
-  *margin-left: 50.95744680851063%;
-}
-.row-fluid .offset5 {
-  margin-left: 44.68085106382979%;
-  *margin-left: 44.57446808510638%;
-}
-.row-fluid .offset5:first-child {
-  margin-left: 42.5531914893617%;
-  *margin-left: 42.4468085106383%;
-}
-.row-fluid .offset4 {
-  margin-left: 36.170212765957444%;
-  *margin-left: 36.06382978723405%;
-}
-.row-fluid .offset4:first-child {
-  margin-left: 34.04255319148936%;
-  *margin-left: 33.93617021276596%;
-}
-.row-fluid .offset3 {
-  margin-left: 27.659574468085104%;
-  *margin-left: 27.5531914893617%;
-}
-.row-fluid .offset3:first-child {
-  margin-left: 25.53191489361702%;
-  *margin-left: 25.425531914893618%;
-}
-.row-fluid .offset2 {
-  margin-left: 19.148936170212764%;
-  *margin-left: 19.04255319148936%;
-}
-.row-fluid .offset2:first-child {
-  margin-left: 17.02127659574468%;
-  *margin-left: 16.914893617021278%;
-}
-.row-fluid .offset1 {
-  margin-left: 10.638297872340425%;
-  *margin-left: 10.53191489361702%;
-}
-.row-fluid .offset1:first-child {
-  margin-left: 8.51063829787234%;
-  *margin-left: 8.404255319148938%;
-}
-[class*="span"].hide,
-.row-fluid [class*="span"].hide {
-  display: none;
-}
-[class*="span"].pull-right,
-.row-fluid [class*="span"].pull-right {
-  float: right;
-}
-.container {
-  margin-right: auto;
-  margin-left: auto;
-  *zoom: 1;
-}
-.container:before,
-.container:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.container:after {
-  clear: both;
-}
-.container-fluid {
-  padding-right: 20px;
-  padding-left: 20px;
-  *zoom: 1;
-}
-.container-fluid:before,
-.container-fluid:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.container-fluid:after {
-  clear: both;
-}
-p {
-  margin: 0 0 10px;
-}
-.lead {
-  margin-bottom: 20px;
-  font-size: 21px;
-  font-weight: 200;
-  line-height: 30px;
-}
-small {
-  font-size: 85%;
-}
-strong {
-  font-weight: bold;
-}
-em {
-  font-style: italic;
-}
-cite {
-  font-style: normal;
-}
-.muted {
-  color: #999999;
-}
-a.muted:hover,
-a.muted:focus {
-  color: #808080;
-}
-.text-warning {
-  color: #c09853;
-}
-a.text-warning:hover,
-a.text-warning:focus {
-  color: #a47e3c;
-}
-.text-error {
-  color: #b94a48;
-}
-a.text-error:hover,
-a.text-error:focus {
-  color: #953b39;
-}
-.text-info {
-  color: #3a87ad;
-}
-a.text-info:hover,
-a.text-info:focus {
-  color: #2d6987;
-}
-.text-success {
-  color: #468847;
-}
-a.text-success:hover,
-a.text-success:focus {
-  color: #356635;
-}
-.text-left {
-  text-align: left;
-}
-.text-right {
-  text-align: right;
-}
-.text-center {
-  text-align: center;
-}
-h1,
-h2,
-h3,
-h4,
-h5,
-h6 {
-  margin: 10px 0;
-  font-family: inherit;
-  font-weight: bold;
-  line-height: 20px;
-  color: inherit;
-  text-rendering: optimizelegibility;
-}
-h1 small,
-h2 small,
-h3 small,
-h4 small,
-h5 small,
-h6 small {
-  font-weight: normal;
-  line-height: 1;
-  color: #999999;
-}
-h1,
-h2,
-h3 {
-  line-height: 40px;
-}
-h1 {
-  font-size: 38.5px;
-}
-h2 {
-  font-size: 31.5px;
-}
-h3 {
-  font-size: 24.5px;
-}
-h4 {
-  font-size: 17.5px;
-}
-h5 {
-  font-size: 14px;
-}
-h6 {
-  font-size: 11.9px;
-}
-h1 small {
-  font-size: 24.5px;
-}
-h2 small {
-  font-size: 17.5px;
-}
-h3 small {
-  font-size: 14px;
-}
-h4 small {
-  font-size: 14px;
-}
-.page-header {
-  padding-bottom: 9px;
-  margin: 20px 0 30px;
-  border-bottom: 1px solid #eeeeee;
-}
-ul,
-ol {
-  padding: 0;
-  margin: 0 0 10px 25px;
-}
-ul ul,
-ul ol,
-ol ol,
-ol ul {
-  margin-bottom: 0;
-}
-li {
-  line-height: 20px;
-}
-ul.unstyled,
-ol.unstyled {
-  margin-left: 0;
-  list-style: none;
-}
-ul.inline,
-ol.inline {
-  margin-left: 0;
-  list-style: none;
-}
-ul.inline > li,
-ol.inline > li {
-  display: inline-block;
-  *display: inline;
-  /* IE7 inline-block hack */
-
-  *zoom: 1;
-  padding-left: 5px;
-  padding-right: 5px;
-}
-dl {
-  margin-bottom: 20px;
-}
-dt,
-dd {
-  line-height: 20px;
-}
-dt {
-  font-weight: bold;
-}
-dd {
-  margin-left: 10px;
-}
-.dl-horizontal {
-  *zoom: 1;
-}
-.dl-horizontal:before,
-.dl-horizontal:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.dl-horizontal:after {
-  clear: both;
-}
-.dl-horizontal dt {
-  float: left;
-  width: 160px;
-  clear: left;
-  text-align: right;
-  overflow: hidden;
-  text-overflow: ellipsis;
-  white-space: nowrap;
-}
-.dl-horizontal dd {
-  margin-left: 180px;
-}
-hr {
-  margin: 20px 0;
-  border: 0;
-  border-top: 1px solid #eeeeee;
-  border-bottom: 1px solid #ffffff;
-}
-abbr[title],
-abbr[data-original-title] {
-  cursor: help;
-  border-bottom: 1px dotted #999999;
-}
-abbr.initialism {
-  font-size: 90%;
-  text-transform: uppercase;
-}
-blockquote {
-  padding: 0 0 0 15px;
-  margin: 0 0 20px;
-  border-left: 5px solid #eeeeee;
-}
-blockquote p {
-  margin-bottom: 0;
-  font-size: 17.5px;
-  font-weight: 300;
-  line-height: 1.25;
-}
-blockquote small {
-  display: block;
-  line-height: 20px;
-  color: #999999;
-}
-blockquote small:before {
-  content: '\2014 \00A0';
-}
-blockquote.pull-right {
-  float: right;
-  padding-right: 15px;
-  padding-left: 0;
-  border-right: 5px solid #eeeeee;
-  border-left: 0;
-}
-blockquote.pull-right p,
-blockquote.pull-right small {
-  text-align: right;
-}
-blockquote.pull-right small:before {
-  content: '';
-}
-blockquote.pull-right small:after {
-  content: '\00A0 \2014';
-}
-q:before,
-q:after,
-blockquote:before,
-blockquote:after {
-  content: "";
-}
-address {
-  display: block;
-  margin-bottom: 20px;
-  font-style: normal;
-  line-height: 20px;
-}
-code,
-pre {
-  padding: 0 3px 2px;
-  font-family: Monaco, Menlo, Consolas, "Courier New", monospace;
-  font-size: 12px;
-  color: #333333;
-  -webkit-border-radius: 3px;
-  -moz-border-radius: 3px;
-  border-radius: 3px;
-}
-code {
-  padding: 2px 4px;
-  color: #d14;
-  background-color: #f7f7f9;
-  border: 1px solid #e1e1e8;
-  white-space: nowrap;
-}
-pre {
-  display: block;
-  padding: 9.5px;
-  margin: 0 0 10px;
-  font-size: 13px;
-  line-height: 20px;
-  word-break: break-all;
-  word-wrap: break-word;
-  white-space: pre;
-  white-space: pre-wrap;
-  background-color: #f5f5f5;
-  border: 1px solid #ccc;
-  border: 1px solid rgba(0, 0, 0, 0.15);
-  -webkit-border-radius: 4px;
-  -moz-border-radius: 4px;
-  border-radius: 4px;
-}
-pre.prettyprint {
-  margin-bottom: 20px;
-}
-pre code {
-  padding: 0;
-  color: inherit;
-  white-space: pre;
-  white-space: pre-wrap;
-  background-color: transparent;
-  border: 0;
-}
-.pre-scrollable {
-  max-height: 340px;
-  overflow-y: scroll;
-}
-.label,
-.badge {
-  display: inline-block;
-  padding: 2px 4px;
-  font-size: 11.844px;
-  font-weight: bold;
-  line-height: 14px;
-  color: #ffffff;
-  vertical-align: baseline;
-  white-space: nowrap;
-  text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25);
-  background-color: #999999;
-}
-.label {
-  -webkit-border-radius: 3px;
-  -moz-border-radius: 3px;
-  border-radius: 3px;
-}
-.badge {
-  padding-left: 9px;
-  padding-right: 9px;
-  -webkit-border-radius: 9px;
-  -moz-border-radius: 9px;
-  border-radius: 9px;
-}
-.label:empty,
-.badge:empty {
-  display: none;
-}
-a.label:hover,
-a.label:focus,
-a.badge:hover,
-a.badge:focus {
-  color: #ffffff;
-  text-decoration: none;
-  cursor: pointer;
-}
-.label-important,
-.badge-important {
-  background-color: #b94a48;
-}
-.label-important[href],
-.badge-important[href] {
-  background-color: #953b39;
-}
-.label-warning,
-.badge-warning {
-  background-color: #f89406;
-}
-.label-warning[href],
-.badge-warning[href] {
-  background-color: #c67605;
-}
-.label-danger,
-.badge-danger {
-  /* XXX: backported from later bootstrap */
-  background-color: #d9534f;
-}
-.label-success,
-.badge-success {
-  background-color: #468847;
-}
-.label-success[href],
-.badge-success[href] {
-  background-color: #356635;
-}
-.label-info,
-.badge-info {
-  background-color: #3a87ad;
-}
-.label-info[href],
-.badge-info[href] {
-  background-color: #2d6987;
-}
-.label-inverse,
-.badge-inverse {
-  background-color: #333333;
-}
-.label-inverse[href],
-.badge-inverse[href] {
-  background-color: #1a1a1a;
-}
-.btn .label,
-.btn .badge {
-  position: relative;
-  top: -1px;
-}
-.btn-mini .label,
-.btn-mini .badge {
-  top: 0;
-}
-table {
-  max-width: 100%;
-  background-color: transparent;
-  border-collapse: collapse;
-  border-spacing: 0;
-}
-.table {
-  width: 100%;
-  margin-bottom: 20px;
-}
-.table th,
-.table td {
-  padding: 8px;
-  line-height: 20px;
-  text-align: left;
-  vertical-align: top;
-  border-top: 1px solid #dddddd;
-}
-.table th {
-  font-weight: bold;
-}
-.table thead th {
-  vertical-align: bottom;
-}
-.table caption + thead tr:first-child th,
-.table caption + thead tr:first-child td,
-.table colgroup + thead tr:first-child th,
-.table colgroup + thead tr:first-child td,
-.table thead:first-child tr:first-child th,
-.table thead:first-child tr:first-child td {
-  border-top: 0;
-}
-.table tbody + tbody {
-  border-top: 2px solid #dddddd;
-}
-.table .table {
-  background-color: #ffffff;
-}
-.table-condensed th,
-.table-condensed td {
-  padding: 4px 5px;
-}
-.table-bordered {
-  border: 1px solid #dddddd;
-  border-collapse: separate;
-  *border-collapse: collapse;
-  border-left: 0;
-  -webkit-border-radius: 4px;
-  -moz-border-radius: 4px;
-  border-radius: 4px;
-}
-.table-bordered th,
-.table-bordered td {
-  border-left: 1px solid #dddddd;
-}
-.table-bordered caption + thead tr:first-child th,
-.table-bordered caption + tbody tr:first-child th,
-.table-bordered caption + tbody tr:first-child td,
-.table-bordered colgroup + thead tr:first-child th,
-.table-bordered colgroup + tbody tr:first-child th,
-.table-bordered colgroup + tbody tr:first-child td,
-.table-bordered thead:first-child tr:first-child th,
-.table-bordered tbody:first-child tr:first-child th,
-.table-bordered tbody:first-child tr:first-child td {
-  border-top: 0;
-}
-.table-bordered thead:first-child tr:first-child > th:first-child,
-.table-bordered tbody:first-child tr:first-child > td:first-child,
-.table-bordered tbody:first-child tr:first-child > th:first-child {
-  -webkit-border-top-left-radius: 4px;
-  -moz-border-radius-topleft: 4px;
-  border-top-left-radius: 4px;
-}
-.table-bordered thead:first-child tr:first-child > th:last-child,
-.table-bordered tbody:first-child tr:first-child > td:last-child,
-.table-bordered tbody:first-child tr:first-child > th:last-child {
-  -webkit-border-top-right-radius: 4px;
-  -moz-border-radius-topright: 4px;
-  border-top-right-radius: 4px;
-}
-.table-bordered thead:last-child tr:last-child > th:first-child,
-.table-bordered tbody:last-child tr:last-child > td:first-child,
-.table-bordered tbody:last-child tr:last-child > th:first-child,
-.table-bordered tfoot:last-child tr:last-child > td:first-child,
-.table-bordered tfoot:last-child tr:last-child > th:first-child {
-  -webkit-border-bottom-left-radius: 4px;
-  -moz-border-radius-bottomleft: 4px;
-  border-bottom-left-radius: 4px;
-}
-.table-bordered thead:last-child tr:last-child > th:last-child,
-.table-bordered tbody:last-child tr:last-child > td:last-child,
-.table-bordered tbody:last-child tr:last-child > th:last-child,
-.table-bordered tfoot:last-child tr:last-child > td:last-child,
-.table-bordered tfoot:last-child tr:last-child > th:last-child {
-  -webkit-border-bottom-right-radius: 4px;
-  -moz-border-radius-bottomright: 4px;
-  border-bottom-right-radius: 4px;
-}
-.table-bordered tfoot + tbody:last-child tr:last-child td:first-child {
-  -webkit-border-bottom-left-radius: 0;
-  -moz-border-radius-bottomleft: 0;
-  border-bottom-left-radius: 0;
-}
-.table-bordered tfoot + tbody:last-child tr:last-child td:last-child {
-  -webkit-border-bottom-right-radius: 0;
-  -moz-border-radius-bottomright: 0;
-  border-bottom-right-radius: 0;
-}
-.table-bordered caption + thead tr:first-child th:first-child,
-.table-bordered caption + tbody tr:first-child td:first-child,
-.table-bordered colgroup + thead tr:first-child th:first-child,
-.table-bordered colgroup + tbody tr:first-child td:first-child {
-  -webkit-border-top-left-radius: 4px;
-  -moz-border-radius-topleft: 4px;
-  border-top-left-radius: 4px;
-}
-.table-bordered caption + thead tr:first-child th:last-child,
-.table-bordered caption + tbody tr:first-child td:last-child,
-.table-bordered colgroup + thead tr:first-child th:last-child,
-.table-bordered colgroup + tbody tr:first-child td:last-child {
-  -webkit-border-top-right-radius: 4px;
-  -moz-border-radius-topright: 4px;
-  border-top-right-radius: 4px;
-}
-.table-striped tbody > tr:nth-child(odd) > td,
-.table-striped tbody > tr:nth-child(odd) > th {
-  background-color: #f9f9f9;
-}
-.table-hover tbody tr:hover > td,
-.table-hover tbody tr:hover > th {
-  background-color: #f5f5f5;
-}
-table td[class*="span"],
-table th[class*="span"],
-.row-fluid table td[class*="span"],
-.row-fluid table th[class*="span"] {
-  display: table-cell;
-  float: none;
-  margin-left: 0;
-}
-.table td.span1,
-.table th.span1 {
-  float: none;
-  width: 44px;
-  margin-left: 0;
-}
-.table td.span2,
-.table th.span2 {
-  float: none;
-  width: 124px;
-  margin-left: 0;
-}
-.table td.span3,
-.table th.span3 {
-  float: none;
-  width: 204px;
-  margin-left: 0;
-}
-.table td.span4,
-.table th.span4 {
-  float: none;
-  width: 284px;
-  margin-left: 0;
-}
-.table td.span5,
-.table th.span5 {
-  float: none;
-  width: 364px;
-  margin-left: 0;
-}
-.table td.span6,
-.table th.span6 {
-  float: none;
-  width: 444px;
-  margin-left: 0;
-}
-.table td.span7,
-.table th.span7 {
-  float: none;
-  width: 524px;
-  margin-left: 0;
-}
-.table td.span8,
-.table th.span8 {
-  float: none;
-  width: 604px;
-  margin-left: 0;
-}
-.table td.span9,
-.table th.span9 {
-  float: none;
-  width: 684px;
-  margin-left: 0;
-}
-.table td.span10,
-.table th.span10 {
-  float: none;
-  width: 764px;
-  margin-left: 0;
-}
-.table td.span11,
-.table th.span11 {
-  float: none;
-  width: 844px;
-  margin-left: 0;
-}
-.table td.span12,
-.table th.span12 {
-  float: none;
-  width: 924px;
-  margin-left: 0;
-}
-.table tbody tr.success > td {
-  background-color: #dff0d8;
-}
-.table tbody tr.error > td {
-  background-color: #f2dede;
-}
-.table tbody tr.warning > td {
-  background-color: #fcf8e3;
-}
-.table tbody tr.info > td {
-  background-color: #d9edf7;
-}
-.table-hover tbody tr.success:hover > td {
-  background-color: #d0e9c6;
-}
-.table-hover tbody tr.error:hover > td {
-  background-color: #ebcccc;
-}
-.table-hover tbody tr.warning:hover > td {
-  background-color: #faf2cc;
-}
-.table-hover tbody tr.info:hover > td {
-  background-color: #c4e3f3;
-}
-form {
-  margin: 0 0 20px;
-}
-fieldset {
-  padding: 0;
-  margin: 0;
-  border: 0;
-}
-legend {
-  display: block;
-  width: 100%;
-  padding: 0;
-  margin-bottom: 20px;
-  font-size: 21px;
-  line-height: 40px;
-  color: #333333;
-  border: 0;
-  border-bottom: 1px solid #e5e5e5;
-}
-legend small {
-  font-size: 15px;
-  color: #999999;
-}
-label,
-input,
-button,
-select,
-textarea {
-  font-size: 14px;
-  font-weight: normal;
-  line-height: 20px;
-}
-input,
-button,
-select,
-textarea {
-  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
-}
-label {
-  display: block;
-  margin-bottom: 5px;
-}
-select,
-textarea,
-input[type="text"],
-input[type="password"],
-input[type="datetime"],
-input[type="datetime-local"],
-input[type="date"],
-input[type="month"],
-input[type="time"],
-input[type="week"],
-input[type="number"],
-input[type="email"],
-input[type="url"],
-input[type="search"],
-input[type="tel"],
-input[type="color"],
-.uneditable-input {
-  display: inline-block;
-  height: 20px;
-  padding: 4px 6px;
-  margin-bottom: 10px;
-  font-size: 14px;
-  line-height: 20px;
-  color: #555555;
-  -webkit-border-radius: 4px;
-  -moz-border-radius: 4px;
-  border-radius: 4px;
-  vertical-align: middle;
-}
-input,
-textarea,
-.uneditable-input {
-  width: 206px;
-}
-textarea {
-  height: auto;
-}
-textarea,
-input[type="text"],
-input[type="password"],
-input[type="datetime"],
-input[type="datetime-local"],
-input[type="date"],
-input[type="month"],
-input[type="time"],
-input[type="week"],
-input[type="number"],
-input[type="email"],
-input[type="url"],
-input[type="search"],
-input[type="tel"],
-input[type="color"],
-.uneditable-input {
-  background-color: #ffffff;
-  border: 1px solid #cccccc;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  -webkit-transition: border linear .2s, box-shadow linear .2s;
-  -moz-transition: border linear .2s, box-shadow linear .2s;
-  -o-transition: border linear .2s, box-shadow linear .2s;
-  transition: border linear .2s, box-shadow linear .2s;
-}
-textarea:focus,
-input[type="text"]:focus,
-input[type="password"]:focus,
-input[type="datetime"]:focus,
-input[type="datetime-local"]:focus,
-input[type="date"]:focus,
-input[type="month"]:focus,
-input[type="time"]:focus,
-input[type="week"]:focus,
-input[type="number"]:focus,
-input[type="email"]:focus,
-input[type="url"]:focus,
-input[type="search"]:focus,
-input[type="tel"]:focus,
-input[type="color"]:focus,
-.uneditable-input:focus {
-  border-color: rgba(82, 168, 236, 0.8);
-  outline: 0;
-  outline: thin dotted \9;
-  /* IE6-9 */
-
-  -webkit-box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);
-  -moz-box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);
-  box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 8px rgba(82,168,236,.6);
-}
-input[type="radio"],
-input[type="checkbox"] {
-  margin: 4px 0 0;
-  *margin-top: 0;
-  /* IE7 */
-
-  margin-top: 1px \9;
-  /* IE8-9 */
-
-  line-height: normal;
-}
-input[type="file"],
-input[type="image"],
-input[type="submit"],
-input[type="reset"],
-input[type="button"],
-input[type="radio"],
-input[type="checkbox"] {
-  width: auto;
-}
-select,
-input[type="file"] {
-  height: 30px;
-  /* In IE7, the height of the select element cannot be changed by height, only font-size */
-
-  *margin-top: 4px;
-  /* For IE7, add top margin to align select with labels */
-
-  line-height: 30px;
-}
-select {
-  width: 220px;
-  border: 1px solid #cccccc;
-  background-color: #ffffff;
-}
-select[multiple],
-select[size] {
-  height: auto;
-}
-select:focus,
-input[type="file"]:focus,
-input[type="radio"]:focus,
-input[type="checkbox"]:focus {
-  outline: thin dotted #333;
-  outline: 5px auto -webkit-focus-ring-color;
-  outline-offset: -2px;
-}
-.uneditable-input,
-.uneditable-textarea {
-  color: #999999;
-  background-color: #fcfcfc;
-  border-color: #cccccc;
-  -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.025);
-  -moz-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.025);
-  box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.025);
-  cursor: not-allowed;
-}
-.uneditable-input {
-  overflow: hidden;
-  white-space: nowrap;
-}
-.uneditable-textarea {
-  width: auto;
-  height: auto;
-}
-input:-moz-placeholder,
-textarea:-moz-placeholder {
-  color: #999999;
-}
-input:-ms-input-placeholder,
-textarea:-ms-input-placeholder {
-  color: #999999;
-}
-input::-webkit-input-placeholder,
-textarea::-webkit-input-placeholder {
-  color: #999999;
-}
-.radio,
-.checkbox {
-  min-height: 20px;
-  padding-left: 20px;
-}
-.radio input[type="radio"],
-.checkbox input[type="checkbox"] {
-  float: left;
-  margin-left: -20px;
-}
-.controls > .radio:first-child,
-.controls > .checkbox:first-child {
-  padding-top: 5px;
-}
-.radio.inline,
-.checkbox.inline {
-  display: inline-block;
-  padding-top: 5px;
-  margin-bottom: 0;
-  vertical-align: middle;
-}
-.radio.inline + .radio.inline,
-.checkbox.inline + .checkbox.inline {
-  margin-left: 10px;
-}
-.input-mini {
-  width: 60px;
-}
-.input-small {
-  width: 90px;
-}
-.input-medium {
-  width: 150px;
-}
-.input-large {
-  width: 210px;
-}
-.input-xlarge {
-  width: 270px;
-}
-.input-xxlarge {
-  width: 530px;
-}
-input[class*="span"],
-select[class*="span"],
-textarea[class*="span"],
-.uneditable-input[class*="span"],
-.row-fluid input[class*="span"],
-.row-fluid select[class*="span"],
-.row-fluid textarea[class*="span"],
-.row-fluid .uneditable-input[class*="span"] {
-  float: none;
-  margin-left: 0;
-}
-.input-append input[class*="span"],
-.input-append .uneditable-input[class*="span"],
-.input-prepend input[class*="span"],
-.input-prepend .uneditable-input[class*="span"],
-.row-fluid input[class*="span"],
-.row-fluid select[class*="span"],
-.row-fluid textarea[class*="span"],
-.row-fluid .uneditable-input[class*="span"],
-.row-fluid .input-prepend [class*="span"],
-.row-fluid .input-append [class*="span"] {
-  display: inline-block;
-}
-input,
-textarea,
-.uneditable-input {
-  margin-left: 0;
-}
-.controls-row [class*="span"] + [class*="span"] {
-  margin-left: 20px;
-}
-input.span12,
-textarea.span12,
-.uneditable-input.span12 {
-  width: 926px;
-}
-input.span11,
-textarea.span11,
-.uneditable-input.span11 {
-  width: 846px;
-}
-input.span10,
-textarea.span10,
-.uneditable-input.span10 {
-  width: 766px;
-}
-input.span9,
-textarea.span9,
-.uneditable-input.span9 {
-  width: 686px;
-}
-input.span8,
-textarea.span8,
-.uneditable-input.span8 {
-  width: 606px;
-}
-input.span7,
-textarea.span7,
-.uneditable-input.span7 {
-  width: 526px;
-}
-input.span6,
-textarea.span6,
-.uneditable-input.span6 {
-  width: 446px;
-}
-input.span5,
-textarea.span5,
-.uneditable-input.span5 {
-  width: 366px;
-}
-input.span4,
-textarea.span4,
-.uneditable-input.span4 {
-  width: 286px;
-}
-input.span3,
-textarea.span3,
-.uneditable-input.span3 {
-  width: 206px;
-}
-input.span2,
-textarea.span2,
-.uneditable-input.span2 {
-  width: 126px;
-}
-input.span1,
-textarea.span1,
-.uneditable-input.span1 {
-  width: 46px;
-}
-.controls-row {
-  *zoom: 1;
-}
-.controls-row:before,
-.controls-row:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.controls-row:after {
-  clear: both;
-}
-.controls-row [class*="span"],
-.row-fluid .controls-row [class*="span"] {
-  float: left;
-}
-.controls-row .checkbox[class*="span"],
-.controls-row .radio[class*="span"] {
-  padding-top: 5px;
-}
-input[disabled],
-select[disabled],
-textarea[disabled],
-input[readonly],
-select[readonly],
-textarea[readonly] {
-  cursor: not-allowed;
-  background-color: #eeeeee;
-}
-input[type="radio"][disabled],
-input[type="checkbox"][disabled],
-input[type="radio"][readonly],
-input[type="checkbox"][readonly] {
-  background-color: transparent;
-}
-.control-group.warning .control-label,
-.control-group.warning .help-block,
-.control-group.warning .help-inline {
-  color: #c09853;
-}
-.control-group.warning .checkbox,
-.control-group.warning .radio,
-.control-group.warning input,
-.control-group.warning select,
-.control-group.warning textarea {
-  color: #c09853;
-}
-.control-group.warning input,
-.control-group.warning select,
-.control-group.warning textarea {
-  border-color: #c09853;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-}
-.control-group.warning input:focus,
-.control-group.warning select:focus,
-.control-group.warning textarea:focus {
-  border-color: #a47e3c;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #dbc59e;
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #dbc59e;
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #dbc59e;
-}
-.control-group.warning .input-prepend .add-on,
-.control-group.warning .input-append .add-on {
-  color: #c09853;
-  background-color: #fcf8e3;
-  border-color: #c09853;
-}
-.control-group.error .control-label,
-.control-group.error .help-block,
-.control-group.error .help-inline {
-  color: #b94a48;
-}
-.control-group.error .checkbox,
-.control-group.error .radio,
-.control-group.error input,
-.control-group.error select,
-.control-group.error textarea {
-  color: #b94a48;
-}
-.control-group.error input,
-.control-group.error select,
-.control-group.error textarea {
-  border-color: #b94a48;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-}
-.control-group.error input:focus,
-.control-group.error select:focus,
-.control-group.error textarea:focus {
-  border-color: #953b39;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #d59392;
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #d59392;
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #d59392;
-}
-.control-group.error .input-prepend .add-on,
-.control-group.error .input-append .add-on {
-  color: #b94a48;
-  background-color: #f2dede;
-  border-color: #b94a48;
-}
-.control-group.success .control-label,
-.control-group.success .help-block,
-.control-group.success .help-inline {
-  color: #468847;
-}
-.control-group.success .checkbox,
-.control-group.success .radio,
-.control-group.success input,
-.control-group.success select,
-.control-group.success textarea {
-  color: #468847;
-}
-.control-group.success input,
-.control-group.success select,
-.control-group.success textarea {
-  border-color: #468847;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-}
-.control-group.success input:focus,
-.control-group.success select:focus,
-.control-group.success textarea:focus {
-  border-color: #356635;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #7aba7b;
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #7aba7b;
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #7aba7b;
-}
-.control-group.success .input-prepend .add-on,
-.control-group.success .input-append .add-on {
-  color: #468847;
-  background-color: #dff0d8;
-  border-color: #468847;
-}
-.control-group.info .control-label,
-.control-group.info .help-block,
-.control-group.info .help-inline {
-  color: #3a87ad;
-}
-.control-group.info .checkbox,
-.control-group.info .radio,
-.control-group.info input,
-.control-group.info select,
-.control-group.info textarea {
-  color: #3a87ad;
-}
-.control-group.info input,
-.control-group.info select,
-.control-group.info textarea {
-  border-color: #3a87ad;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075);
-}
-.control-group.info input:focus,
-.control-group.info select:focus,
-.control-group.info textarea:focus {
-  border-color: #2d6987;
-  -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #7ab5d3;
-  -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #7ab5d3;
-  box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 6px #7ab5d3;
-}
-.control-group.info .input-prepend .add-on,
-.control-group.info .input-append .add-on {
-  color: #3a87ad;
-  background-color: #d9edf7;
-  border-color: #3a87ad;
-}
-input:focus:invalid,
-textarea:focus:invalid,
-select:focus:invalid {
-  color: #b94a48;
-  border-color: #ee5f5b;
-}
-input:focus:invalid:focus,
-textarea:focus:invalid:focus,
-select:focus:invalid:focus {
-  border-color: #e9322d;
-  -webkit-box-shadow: 0 0 6px #f8b9b7;
-  -moz-box-shadow: 0 0 6px #f8b9b7;
-  box-shadow: 0 0 6px #f8b9b7;
-}
-.form-actions {
-  padding: 19px 20px 20px;
-  margin-top: 20px;
-  margin-bottom: 20px;
-  background-color: #f5f5f5;
-  border-top: 1px solid #e5e5e5;
-  *zoom: 1;
-}
-.form-actions:before,
-.form-actions:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.form-actions:after {
-  clear: both;
-}
-.help-block,
-.help-inline {
-  color: #595959;
-}
-.help-block {
-  display: block;
-  margin-bottom: 10px;
-}
-.help-inline {
-  display: inline-block;
-  *display: inline;
-  /* IE7 inline-block hack */
-
-  *zoom: 1;
-  vertical-align: middle;
-  padding-left: 5px;
-}
-.input-append,
-.input-prepend {
-  display: inline-block;
-  margin-bottom: 10px;
-  vertical-align: middle;
-  font-size: 0;
-  white-space: nowrap;
-}
-.input-append input,
-.input-prepend input,
-.input-append select,
-.input-prepend select,
-.input-append .uneditable-input,
-.input-prepend .uneditable-input,
-.input-append .dropdown-menu,
-.input-prepend .dropdown-menu,
-.input-append .popover,
-.input-prepend .popover {
-  font-size: 14px;
-}
-.input-append input,
-.input-prepend input,
-.input-append select,
-.input-prepend select,
-.input-append .uneditable-input,
-.input-prepend .uneditable-input {
-  position: relative;
-  margin-bottom: 0;
-  *margin-left: 0;
-  vertical-align: top;
-  -webkit-border-radius: 0 4px 4px 0;
-  -moz-border-radius: 0 4px 4px 0;
-  border-radius: 0 4px 4px 0;
-}
-.input-append input:focus,
-.input-prepend input:focus,
-.input-append select:focus,
-.input-prepend select:focus,
-.input-append .uneditable-input:focus,
-.input-prepend .uneditable-input:focus {
-  z-index: 2;
-}
-.input-append .add-on,
-.input-prepend .add-on {
-  display: inline-block;
-  width: auto;
-  height: 20px;
-  min-width: 16px;
-  padding: 4px 5px;
-  font-size: 14px;
-  font-weight: normal;
-  line-height: 20px;
-  text-align: center;
-  text-shadow: 0 1px 0 #ffffff;
-  background-color: #eeeeee;
-  border: 1px solid #ccc;
-}
-.input-append .add-on,
-.input-prepend .add-on,
-.input-append .btn,
-.input-prepend .btn,
-.input-append .btn-group > .dropdown-toggle,
-.input-prepend .btn-group > .dropdown-toggle {
-  vertical-align: top;
-  -webkit-border-radius: 0;
-  -moz-border-radius: 0;
-  border-radius: 0;
-}
-.input-append .active,
-.input-prepend .active {
-  background-color: #a9dba9;
-  border-color: #46a546;
-}
-.input-prepend .add-on,
-.input-prepend .btn {
-  margin-right: -1px;
-}
-.input-prepend .add-on:first-child,
-.input-prepend .btn:first-child {
-  -webkit-border-radius: 4px 0 0 4px;
-  -moz-border-radius: 4px 0 0 4px;
-  border-radius: 4px 0 0 4px;
-}
-.input-append input,
-.input-append select,
-.input-append .uneditable-input {
-  -webkit-border-radius: 4px 0 0 4px;
-  -moz-border-radius: 4px 0 0 4px;
-  border-radius: 4px 0 0 4px;
-}
-.input-append input + .btn-group .btn:last-child,
-.input-append select + .btn-group .btn:last-child,
-.input-append .uneditable-input + .btn-group .btn:last-child {
-  -webkit-border-radius: 0 4px 4px 0;
-  -moz-border-radius: 0 4px 4px 0;
-  border-radius: 0 4px 4px 0;
-}
-.input-append .add-on,
-.input-append .btn,
-.input-append .btn-group {
-  margin-left: -1px;
-}
-.input-append .add-on:last-child,
-.input-append .btn:last-child,
-.input-append .btn-group:last-child > .dropdown-toggle {
-  -webkit-border-radius: 0 4px 4px 0;
-  -moz-border-radius: 0 4px 4px 0;
-  border-radius: 0 4px 4px 0;
-}
-.input-prepend.input-append input,
-.input-prepend.input-append select,
-.input-prepend.input-append .uneditable-input {
-  -webkit-border-radius: 0;
-  -moz-border-radius: 0;
-  border-radius: 0;
-}
-.input-prepend.input-append input + .btn-group .btn,
-.input-prepend.input-append select + .btn-group .btn,
-.input-prepend.input-append .uneditable-input + .btn-group .btn {
-  -webkit-border-radius: 0 4px 4px 0;
-  -moz-border-radius: 0 4px 4px 0;
-  border-radius: 0 4px 4px 0;
-}
-.input-prepend.input-append .add-on:first-child,
-.input-prepend.input-append .btn:first-child {
-  margin-right: -1px;
-  -webkit-border-radius: 4px 0 0 4px;
-  -moz-border-radius: 4px 0 0 4px;
-  border-radius: 4px 0 0 4px;
-}
-.input-prepend.input-append .add-on:last-child,
-.input-prepend.input-append .btn:last-child {
-  margin-left: -1px;
-  -webkit-border-radius: 0 4px 4px 0;
-  -moz-border-radius: 0 4px 4px 0;
-  border-radius: 0 4px 4px 0;
-}
-.input-prepend.input-append .btn-group:first-child {
-  margin-left: 0;
-}
-input.search-query {
-  padding-right: 14px;
-  padding-right: 4px \9;
-  padding-left: 14px;
-  padding-left: 4px \9;
-  /* IE7-8 doesn't have border-radius, so don't indent the padding */
-
-  margin-bottom: 0;
-  -webkit-border-radius: 15px;
-  -moz-border-radius: 15px;
-  border-radius: 15px;
-}
-/* Allow for input prepend/append in search forms */
-.form-search .input-append .search-query,
-.form-search .input-prepend .search-query {
-  -webkit-border-radius: 0;
-  -moz-border-radius: 0;
-  border-radius: 0;
-}
-.form-search .input-append .search-query {
-  -webkit-border-radius: 14px 0 0 14px;
-  -moz-border-radius: 14px 0 0 14px;
-  border-radius: 14px 0 0 14px;
-}
-.form-search .input-append .btn {
-  -webkit-border-radius: 0 14px 14px 0;
-  -moz-border-radius: 0 14px 14px 0;
-  border-radius: 0 14px 14px 0;
-}
-.form-search .input-prepend .search-query {
-  -webkit-border-radius: 0 14px 14px 0;
-  -moz-border-radius: 0 14px 14px 0;
-  border-radius: 0 14px 14px 0;
-}
-.form-search .input-prepend .btn {
-  -webkit-border-radius: 14px 0 0 14px;
-  -moz-border-radius: 14px 0 0 14px;
-  border-radius: 14px 0 0 14px;
-}
-.form-search input,
-.form-inline input,
-.form-horizontal input,
-.form-search textarea,
-.form-inline textarea,
-.form-horizontal textarea,
-.form-search select,
-.form-inline select,
-.form-horizontal select,
-.form-search .help-inline,
-.form-inline .help-inline,
-.form-horizontal .help-inline,
-.form-search .uneditable-input,
-.form-inline .uneditable-input,
-.form-horizontal .uneditable-input,
-.form-search .input-prepend,
-.form-inline .input-prepend,
-.form-horizontal .input-prepend,
-.form-search .input-append,
-.form-inline .input-append,
-.form-horizontal .input-append {
-  display: inline-block;
-  *display: inline;
-  /* IE7 inline-block hack */
-
-  *zoom: 1;
-  margin-bottom: 0;
-  vertical-align: middle;
-}
-.form-search .hide,
-.form-inline .hide,
-.form-horizontal .hide {
-  display: none;
-}
-.form-search label,
-.form-inline label,
-.form-search .btn-group,
-.form-inline .btn-group {
-  display: inline-block;
-}
-.form-search .input-append,
-.form-inline .input-append,
-.form-search .input-prepend,
-.form-inline .input-prepend {
-  margin-bottom: 0;
-}
-.form-search .radio,
-.form-search .checkbox,
-.form-inline .radio,
-.form-inline .checkbox {
-  padding-left: 0;
-  margin-bottom: 0;
-  vertical-align: middle;
-}
-.form-search .radio input[type="radio"],
-.form-search .checkbox input[type="checkbox"],
-.form-inline .radio input[type="radio"],
-.form-inline .checkbox input[type="checkbox"] {
-  float: left;
-  margin-right: 3px;
-  margin-left: 0;
-}
-.control-group {
-  margin-bottom: 10px;
-}
-legend + .control-group {
-  margin-top: 20px;
-  -webkit-margin-top-collapse: separate;
-}
-.form-horizontal .control-group {
-  margin-bottom: 20px;
-  *zoom: 1;
-}
-.form-horizontal .control-group:before,
-.form-horizontal .control-group:after {
-  display: table;
-  content: "";
-  line-height: 0;
-}
-.form-horizontal .control-group:after {
-  clear: both;
-}
-.form-horizontal .control-label {
-  float: left;
-  width: 160px;
-  padding-top: 5px;
-  text-align: right;
-}
-.form-horizontal .controls {
-  *display: inline-block;
-  *padding-left: 20px;
-  margin-left: 180px;
-  *margin-left: 0;
-}
-.form-horizontal .controls:first-child {
-  *padding-left: 180px;
-}
-.form-horizontal .help-block {
-  margin-bottom: 0;
-}
-.form-horizontal input + .help-block,
-.form-horizontal select + .help-block,
-.form-horizontal textarea + .help-block,
-.form-horizontal .uneditable-input + .help-block,
-.form-horizontal .input-prepend + .help-block,
-.form-horizontal .input-append + .help-block {
-  margin-top: 10px;
-}
-.form-horizontal .form-actions {
-  padding-left: 180px;
-}
-.btn {
-  display: inline-block;
-  *display: inline;
-  /* IE7 inline-block hack */
-
-  *zoom: 1;
-  padding: 4px 12px;
-  margin-bottom: 0;
-  font-size: 14px;
-  line-height: 20px;
-  text-align: center;
-  vertical-align: middle;
-  cursor: pointer;
-  color: #333333;
-  text-shadow: 0 1px 1px rgba(255, 255, 255, 0.75);
-  background-color: #f5f5f5;
-  background-image: -moz-linear-gradient(top, #ffffff, #e6e6e6);
-  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#e6e6e6));
-  background-image: -webkit-linear-gradient(top, #ffffff, #e6e6e6);
-  background-image: -o-linear-gradient(top, #ffffff, #e6e6e6);
-  background-image: linear-gradient(to bottom, #ffffff, #e6e6e6);
-  background-repeat: repeat-x;
-  filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#ffe6e6e6', GradientType=0);
-  border-color: #e6e6e6 #e6e6e6 #bfbfbf;
-  border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);
-  *background-color: #e6e6e6;
-  /* Darken IE7 buttons by default so they stand out more given they won't have borders */
-
-  filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
-  border: 1px solid #cccccc;
-  *border: 0;
-  border-bottom-color: #b3b3b3;
-  -webkit-border-radius: 4px;
-  -moz-border-radius: 4px;
-  border-radius: 4px;
-  *margin-left: .3em;
-  -webkit-box-shadow: inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);
-  -moz-box-shadow: inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);
-  box-shadow: inset 0 1px 0 rgba(255,255,255,.2), 0 1px 2px rgba(0,0,0,.05);
-}
-.btn:hover,
-.btn:focus,
-.btn:active,
-.btn.active,
-.btn.disabled,
-.btn[disabled] {
-  color: #333333;
-  background-color: #e6e6e6;
-  *background-color: #d9d9d9;
-}
-.btn:active,
-.btn.active {
-  background-color: #cccccc \9;
-}
-.btn:first-child {
-  *margin-left: 0;
-}
-.btn:hover,
-.btn:focus {
-  color: #333333;
-  text-decoration: none;
-  background-position: 0 -15px;
-  -webkit-transition: background-position 0.1s linear;
-  -moz-transition: background-position 0.1s linear;
-  -o-transition: background-position 0.1s linear;
-  transition: background-position 0.1s linear;
-}
-.btn:focus {
-  outline: thin dotted #333;
-  outline: 5px auto -webkit-focus-ring-color;
-  outline-offset: -2px;
-}
-.btn.active,
-.btn:active {
-  background-image: none;
-  outline: 0;
-  -webkit-box-shadow: inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);
-  -moz-box-shadow: inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);
-  box-shadow: inset 0 2px 4px rgba(0,0,0,.15), 0 1px 2px rgba(0,0,0,.05);
-}
-.btn.disabled,
-.btn[disabled] {
-  cursor: default;
-  background-image: none;
-  opacity: 0.65;
-  filter: alpha(opacity=65);
-  -webkit-box-shadow: none;
-  -moz-box-shadow: none;
-  box-shadow: none;
-}
-.btn-large {
-  padding: 11px 19px;
-  font-size: 17.5px;
-  -webkit-border-radius: 6px;
-  -moz-border-radius: 6px;
-  border-radius: 6px;
-}
-.btn-large [class^="icon-"],
-.btn-large [class*=" icon-"] {
-  margin-top: 4px;
-}
-.btn-small {
-  padding: 2px 10px;
-  font-size: 11.9px;
-  -webkit-border-radius: 3px;
-  -moz-border-radius: 3px;
-  border-radius: 3px;
-}
-.btn-small [class^="icon-"],
-.btn-small [class*=" icon-"] {
-  margin-top: 0;
-}
-.btn-mini [class^="icon-"],
-.btn-mini [class*=" icon-"] {
-  margin-top: -1px;
-}
-.btn-mini {
-  padding: 0 6px;
-  font-size: 10.5px;
-  -webkit-border-radius: 3px;
-  -moz-border-radius: 3px;
-  border-radius: 3px;
-}
-.btn-block {
-  display: block;
-  width: 100%;
-  padding-left: 0;
-  padding-right: 0;
-  -webkit-box-sizing: border-box;
-  -moz-box-sizing: border-box;
-  box-sizing: border-box;
-}
-.btn-block + .btn-block {
-  margin-top: 5px;
-}
-input[type="submit"].btn-block,
-input[type="reset"].btn-block,
-input[type="button"].btn-block {
-  width: 100%;
-}
-.btn-primary.active,
-.btn-warning.active,
-.btn-danger.active,
-.btn-success.active,
-.btn-info.active,
-.btn-inverse.active {
-  color: rgba(255, 255, 255, 0.75);
-}
-.btn-primary {
-  color: #ffffff;
-  text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25);
-  background-color: #006dcc;
-  background-image: -moz-linear-gradient(top, #0088cc, #0044cc);
-  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#0088cc), to(#0044cc));
-  background-image: -webkit-linear-gradient(top, #0088cc, #0044cc);
-  background-image: -o-linear-gradient(top, #0088cc, #0044cc);
-  background-image: linear-gradient(to bottom, #0088cc, #0044cc);
-  background-repeat: repeat-x;
-  filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff0088cc', endColorstr='#ff0044cc', GradientType=0);
-  border-color: #0044cc #0044cc #002a80;
-  border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);
-  *background-color: #0044cc;
-  /* Darken IE7 buttons by default so they stand out more given they won't have borders */
-
-  filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
-}
-.btn-primary:hover,
-.btn-primary:focus,
-.btn-primary:active,
-.btn-primary.active,
-.btn-primary.disabled,
-.btn-primary[disabled] {
-  color: #ffffff;
-  background-color: #0044cc;
-  *background-color: #003bb3;
-}
-.btn-primary:active,
-.btn-primary.active {
-  background-color: #003399 \9;
-}
-.btn-warning {
-  color: #ffffff;
-  text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25);
-  background-color: #faa732;
-  background-image: -moz-linear-gradient(top, #fbb450, #f89406);
-  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#fbb450), to(#f89406));
-  background-image: -webkit-linear-gradient(top, #fbb450, #f89406);
-  background-image: -o-linear-gradient(top, #fbb450, #f89406);
-  background-image: linear-gradient(to bottom, #fbb450, #f89406);
-  background-repeat: repeat-x;
-  filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffbb450', endColorstr='#fff89406', GradientType=0);
-  border-color: #f89406 #f89406 #ad6704;
-  border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);
-  *background-color: #f89406;
-  /* Darken IE7 buttons by default so they stand out more given they won't have borders */
-
-  filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
-}
-.btn-warning:hover,
-.btn-warning:focus,
-.btn-warning:active,
-.btn-warning.active,
-.btn-warning.disabled,
-.btn-warning[disabled] {
-  color: #ffffff;
-  background-color: #f89406;
-  *background-color: #df8505;
-}
-.btn-warning:active,
-.btn-warning.active {
-  background-color: #c67605 \9;
-}
-.btn-danger {
-  color: #ffffff;
-  text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25);
-  background-color: #da4f49;
-  background-image: -moz-linear-gradient(top, #ee5f5b, #bd362f);
-  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#ee5f5b), to(#bd362f));
-  background-image: -webkit-linear-gradient(top, #ee5f5b, #bd362f);
-  background-image: -o-linear-gradient(top, #ee5f5b, #bd362f);
-  background-image: linear-gradient(to bottom, #ee5f5b, #bd362f);
-  background-repeat: repeat-x;
-  filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffee5f5b', endColorstr='#ffbd362f', GradientType=0);
-  border-color: #bd362f #bd362f #802420;
-  border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);
-  *background-color: #bd362f;
-  /* Darken IE7 buttons by default so they stand out more given they won't have borders */
-
-  filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
-}
-.btn-danger:hover,
-.btn-danger:focus,
-.btn-danger:active,
-.btn-danger.active,
-.btn-danger.disabled,
-.btn-danger[disabled] {
-  color: #ffffff;
-  background-color: #bd362f;
-  *background-color: #a9302a;
-}
-.btn-danger:active,
-.btn-danger.active {
-  background-color: #942a25 \9;
-}
-.btn-success {
-  color: #ffffff;
-  text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25);
-  background-color: #5bb75b;
-  background-image: -moz-linear-gradient(top, #62c462, #51a351);
-  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#62c462), to(#51a351));
-  background-image: -webkit-linear-gradient(top, #62c462, #51a351);
-  background-image: -o-linear-gradient(top, #62c462, #51a351);
-  background-image: linear-gradient(to bottom, #62c462, #51a351);
-  background-repeat: repeat-x;
-  filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff62c462', endColorstr='#ff51a351', GradientType=0);
-  border-color: #51a351 #51a351 #387038;
-  border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);
-  *background-color: #51a351;
-  /* Darken IE7 buttons by default so they stand out more given they won't have borders */
-
-  filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
-}
-.btn-success:hover,
-.btn-success:focus,
-.btn-success:active,
-.btn-success.active,
-.btn-success.disabled,
-.btn-success[disabled] {
-  color: #ffffff;
-  background-color: #51a351;
-  *background-color: #499249;
-}
-.btn-success:active,
-.btn-success.active {
-  background-color: #408140 \9;
-}
-.btn-info {
-  color: #ffffff;
-  text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25);
-  background-color: #49afcd;
-  background-image: -moz-linear-gradient(top, #5bc0de, #2f96b4);
-  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#5bc0de), to(#2f96b4));
-  background-image: -webkit-linear-gradient(top, #5bc0de, #2f96b4);
-  background-image: -o-linear-gradient(top, #5bc0de, #2f96b4);
-  background-image: linear-gradient(to bottom, #5bc0de, #2f96b4);
-  background-repeat: repeat-x;
-  filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff2f96b4', GradientType=0);
-  border-color: #2f96b4 #2f96b4 #1f6377;
-  border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);
-  *background-color: #2f96b4;
-  /* Darken IE7 buttons by default so they stand out more given they won't have borders */
-
-  filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
-}
-.btn-info:hover,
-.btn-info:focus,
-.btn-info:active,
-.btn-info.active,
-.btn-info.disabled,
-.btn-info[disabled] {
-  color: #ffffff;
-  background-color: #2f96b4;
-  *background-color: #2a85a0;
-}
-.btn-info:active,
-.btn-info.active {
-  background-color: #24748c \9;
-}
-.btn-inverse {
-  color: #ffffff;
-  text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25);
-  background-color: #363636;
-  background-image: -moz-linear-gradient(top, #444444, #222222);
-  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#444444), to(#222222));
-  background-image: -webkit-linear-gradient(top, #444444, #222222);
-  background-image: -o-linear-gradient(top, #444444, #222222);
-  background-image: linear-gradient(to bottom, #444444, #222222);
-  background-repeat: repeat-x;
-  filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff444444', endColorstr='#ff222222', GradientType=0);
-  border-color: #222222 #222222 #000000;
-  border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25);
-  *background-color: #222222;
-  /* Darken IE7 buttons by default so they stand out more given they won't have borders */
-
-  filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
-}
-.btn-inverse:hover,
-.btn-inverse:focus,
-.btn-inverse:active,
-.btn-inverse.active,
-.btn-inverse.disabled,
-.btn-inverse[disabled] {
-  color: #ffffff;
-  background-color: #222222;
-  *background-color: #151515;
-}
-.btn-inverse:active,
-.btn-inverse.active {
-  background-color: #080808 \9;
-}
-button.btn,
-input[type="submit"].btn {
-  *padding-top: 3px;
-  *padding-bottom: 3px;
-}
-button.btn::-moz-focus-inner,
-input[type="submit"].btn::-moz-focus-inner {
-  padding: 0;
-  border: 0;
-}
-button.btn.btn-large,
-input[type="submit"].btn.btn-large {
-  *padding-top: 7px;
-  *padding-bottom: 7px;
-}
-button.btn.btn-small,
-input[type="submit"].btn.btn-small {
-  *padding-top: 3px;
-  *padding-bottom: 3px;
-}
-button.btn.btn-mini,
-input[type="submit"].btn.btn-mini {
-  *padding-top: 1px;
-  *padding-bottom: 1px;
-}
-.btn-link,
-.btn-link:active,
-.btn-link[disabled] {
-  background-color: transparent;
-  background-image: none;
-  -webkit-box-shadow: none;
-  -moz-box-shadow: none;
-  box-shadow: none;
-}
-.btn-link {
-  border-color: transparent;
-  cursor: pointer;
-  color: #0088cc;
-  -webkit-border-radius: 0;
-  -moz-border-radius: 0;
-  border-radius: 0;
-}
-.btn-link:hover,
-.btn-link:focus {
-  color: #005580;
-  text-decoration: underline;
-  background-color: transparent;
-}
-.btn-link[disabled]:hover,
-.btn-link[disabled]:focus {
-  color: #333333;
-  text-decoration: none;
-}
-[class^="icon-"],
-[class*=" icon-"] {
-  display: inline-block;
-  width: 14px;
-  height: 14px;
-  *margin-right: .3em;
-  line-height: 14px;
-  vertical-align: text-top;
-  background-image: url("../img/glyphicons-halflings.png");
-  background-position: 14px 14px;
-  background-repeat: no-repeat;
-  margin-top: 1px;
-}
-/* White icons with optional class, or on hover/focus/active states of certain elements */
-.icon-white,
-.nav-pills > .active > a > [class^="icon-"],
-.nav-pills > .active > a > [class*=" icon-"],
-.nav-list > .active > a > [class^="icon-"],
-.nav-list > .active > a > [class*=" icon-"],
-.navbar-inverse .nav > .active > a > [class^="icon-"],
-.navbar-inverse .nav > .active > a > [class*=" icon-"],
-.dropdown-menu > li > a:hover > [class^="icon-"],
-.dropdown-menu > li > a:focus > [class^="icon-"],
-.dropdown-menu > li > a:hover > [class*=" icon-"],
-.dropdown-menu > li > a:focus > [class*=" icon-"],
-.dropdown-menu > .active > a > [class^="icon-"],
-.dropdown-menu > .active > a > [class*=" icon-"],
-.dropdown-submenu:hover > a > [class^="icon-"],
-.dropdown-submenu:focus > a > [class^="icon-"],
-.dropdown-submenu:hover > a > [class*=" icon-"],
-.dropdown-submenu:focus > a > [class*=" icon-"] {
-  background-image: url("../img/glyphicons-halflings-white.png");
-}
-.icon-glass {
-  background-position: 0      0;
-}
-.icon-music {
-  background-position: -24px 0;
-}
-.icon-search {
-  background-position: -48px 0;
-}
-.icon-envelope {
-  background-position: -72px 0;
-}
-.icon-heart {
-  background-position: -96px 0;
-}
-.icon-star {
-  background-position: -120px 0;
-}
-.icon-star-empty {
-  background-position: -144px 0;
-}
-.icon-user {
-  background-position: -168px 0;
-}
-.icon-film {
-  background-position: -192px 0;
-}
-.icon-th-large {
-  background-position: -216px 0;
-}
-.icon-th {
-  background-position: -240px 0;
-}
-.icon-th-list {
-  background-position: -264px 0;
-}
-.icon-ok {
-  background-position: -288px 0;
-}
-.icon-remove {
-  background-position: -312px 0;
-}
-.icon-zoom-in {
-  background-position: -336px 0;
-}
-.icon-zoom-out {
-  background-position: -360px 0;
-}
-.icon-off {
-  background-position: -384px 0;
-}
-.icon-signal {
-  background-position: -408px 0;
-}
-.icon-cog {
-  background-position: -432px 0;
-}
-.icon-trash {
-  background-position: -456px 0;
-}
-.icon-home {
-  background-position: 0 -24px;
-}
-.icon-file {
-  background-position: -24px -24px;
-}
-.icon-time {
-  background-position: -48px -24px;
-}
-.icon-road {
-  background-position: -72px -24px;
-}
-.icon-download-alt {
-  background-position: -96px -24px;
-}
-.icon-download {
-  background-position: -120px -24px;
-}
-.icon-upload {
-  background-position: -144px -24px;
-}
-.icon-inbox {
-  background-position: -168px -24px;
-}
-.icon-play-circle {
-  background-position: -192px -24px;
-}
-.icon-repeat {
-  background-position: -216px -24px;
-}
-.icon-refresh {
-  background-position: -240px -24px;
-}
-.icon-list-alt {
-  background-position: -264px -24px;
-}
-.icon-lock {
-  background-position: -287px -24px;
-}
-.icon-flag {
-  background-position: -312px -24px;
-}
-.icon-headphones {
-  background-position: -336px -24px;
-}
-.icon-volume-off {
-  background-position: -360px -24px;
-}
-.icon-volume-down {
-  background-position: -384px -24px;
-}
-.icon-volume-up {
-  background-position: -408px -24px;
-}
-.icon-qrcode {
-  background-position: -432px -24px;
-}
-.icon-barcode {
-  background-position: -456px -24px;
-}
-.icon-tag {
-  background-position: 0 -48px;
-}
-.icon-tags {
-  background-position: -25px -48px;
-}
-.icon-book {
-  background-position: -48px -48px;
-}
-.icon-bookmark {
-  background-position: -72px -48px;
-}
-.icon-print {
-  background-position: -96px -48px;
-}
-.icon-camera {
-  background-position: -120px -48px;
-}
-.icon-font {
-  background-position: -144px -48px;
-}
-.icon-bold {
-  background-position: -167px -48px;
-}
-.icon-italic {
-  background-position: -192px -48px;
-}
-.icon-text-height {
-  background-position: -216px -48px;
-}
-.icon-text-width {
-  background-position: -240px -48px;
-}
-.icon-align-left {
-  background-position: -264px -48px;
-}
-.icon-align-center {
-  background-position: -288px -48px;
-}
-.icon-align-right {
-  background-position: -312px -48px;
-}
-.icon-align-justify {
-  background-position: -336px -48px;
-}
-.icon-list {
-  background-position: -360px -48px;
-}
-.icon-indent-left {
-  background-position: -384px -48px;
-}
-.icon-indent-right {
-  background-position: -408px -48px;
-}
-.icon-facetime-video {
-  background-position: -432px -48px;
-}
-.icon-picture {
-  background-position: -456px -48px;
-}
-.icon-pencil {
-  background-position: 0 -72px;
-}
-.icon-map-marker {
-  background-position: -24px -72px;
-}
-.icon-adjust {
-  background-position: -48px -72px;
-}
-.icon-tint {
-  background-position: -72px -72px;
-}
-.icon-edit {
-  background-position: -96px -72px;
-}
-.icon-share {
-  background-position: -120px -72px;
-}
-.icon-check {
-  background-position: -144px -72px;
-}
-.icon-move {
-  background-position: -168px -72px;
-}
-.icon-step-backward {
-  background-position: -192px -72px;
-}
-.icon-fast-backward {
-  background-position: -216px -72px;
-}
-.icon-backward {
-  background-position: -240px -72px;
-}
-.icon-play {
-  background-position: -264px -72px;
-}
-.icon-pause {
-  background-position: -288px -72px;
-}
-.icon-stop {
-  background-position: -312px -72px;
-}
-.icon-forward {
-  background-position: -336px -72px;
-}
-.icon-fast-forward {
-  background-position: -360px -72px;
-}
-.icon-step-forward {
-  background-position: -384px -72px;
-}
-.icon-eject {
-  background-position: -408px -72px;
-}
-.icon-chevron-left {
-  background-position: -432px -72px;
-}
-.icon-chevron-right {
-  background-position: -456px -72px;
-}
-.icon-plus-sign {
-  background-position: 0 -96px;
-}
-.icon-minus-sign {
-  background-position: -24px -96px;
-}
-.icon-remove-sign {
-  background-position: -48px -96px;
-}
-.icon-ok-sign {
-  background-position: -72px -96px;
-}
-.icon-question-sign {
-  background-position: -96px -96px;
-}
-.icon-info-sign {
-  background-position: -120px -96px;
-}
-.icon-screenshot {
-  background-position: -144px -96px;
-}
-.icon-remove-circle {
-  background-position: -168px -96px;
-}
-.icon-ok-circle {
-  background-position: -192px -96px;
-}
-.icon-ban-circle {
-  background-position: -216px -96px;
-}
-.icon-arrow-left {
-  background-position: -240px -96px;
-}
-.icon-arrow-right {
-  background-position: -264px -96px;
-}
-.icon-arrow-up {
-  background-position: -289px -96px;
-}
-.icon-arrow-down {
-  background-position: -312px -96px;
-}
-.icon-share-alt {
-  background-position: -336px -96px;
-}
-.icon-resize-full {
-  background-position: -360px -96px;
-}
-.icon-resize-small {
-  background-position: -384px -96px;
-}
-.icon-plus {
-  background-position: -408px -96px;
-}
-.icon-minus {
-  background-position: -433px -96px;
-}
-.icon-asterisk {
-  background-position: -456px -96px;
-}
-.icon-exclamation-sign {
-  background-position: 0 -120px;
-}
-.icon-gift {
-  background-position: -24px -120px;
-}
-.icon-leaf {
-  background-position: -48px -120px;
-}
-.icon-fire {
-  background-position: -72px -120px;
-}
-.icon-eye-open {
-  background-position: -96px -120px;
-}
-.icon-eye-close {
-  background-position: -120px -120px;
-}
-.icon-warning-sign {
-  background-position: -144px -120px;
-}
-.icon-plane {
-  background-position: -168px -120px;
-}
-.icon-calendar {
-  background-position: -192px -120px;
-}
-.icon-random {
-  background-position: -216px -120px;
-  width: 16px;
-}
-.icon-comment {
-  background-position: -240px -120px;
-}
-.icon-magnet {
-  background-position: -264px -120px;
-}
-.icon-chevron-up {
-  background-position: -288px -120px;
-}
-.icon-chevron-down {
-  background-position: -313px -119px;
-}
-.icon-retweet {
-  background-position: -336px -120px;
-}
-.icon-shopping-cart {
-  background-position: -360px -120px;
-}
-.icon-folder-close {
-  background-position: -384px -120px;
-  width: 16px;
-}
-.icon-folder-open {
-  background-position: -408px -120px;
-  width: 16px;
-}
-.icon-resize-vertical {
-  background-position: -432px -119px;
-}
-.icon-resize-horizontal {
-  background-position: -456px -118px;
-}
-.icon-hdd {
-  background-position: 0 -144px;
-}
-.icon-bullhorn {
-  background-position: -24px -144px;
-}
-.icon-bell {
-  background-position: -48px -144px;
-}
-.icon-certificate {
-  background-position: -72px -144px;
-}
-.icon-thumbs-up {
-  background-position: -96px -144px;
-}
-.icon-thumbs-down {
-  background-position: -120px -144px;
-}
-.icon-hand-right {
-  background-position: -144px -144px;
-}
-.icon-hand-left {
-  background-position: -168px -144px;
-}
-.icon-hand-up {
-  background-position: -192px -144px;
-}
-.icon-hand-down {
-  background-position: -216px -144px;
-}
-.icon-circle-arrow-right {
-  background-position: -240px -144px;
-}
-.icon-circle-arrow-left {
-  background-position: -264px -144px;
-}
-.icon-circle-arrow-up {
-  background-position: -288px -144px;
-}
-.icon-circle-arrow-down {
-  background-position: -312px -144px;
-}
-.icon-globe {
-  background-position: -336px -144px;
-}
-.icon-wrench {
-  background-position: -360px -144px;
-}
-.icon-tasks {
-  background-position: -384px -144px;
-}
-.icon-filter {
-  background-position: -408px -144px;
-}
-.icon-briefcase {
-  background-position: -432px -144px;
-}
-.icon-fullscreen {
-  background-position: -456px -144px;
-}
-.btn-group {
-  position: relative;
-  display: inline-block;
-  *display: inline;
-  /* IE7 inline-block hack */
-
-  *zoom: 1;
-  font-size: 0;
-  vertical-align: middle;
-  white-space: nowrap;
-  *margin-left: .3em;
-}
-.btn-group:first-child {
-  *margin-left: 0;
-}
-.btn-group + .btn-group {
-  margin-left: 5px;
-}
-.btn-toolbar {
-  font-size: 0;
-  margin-top: 10px;
-  margin-bottom: 10px;
-}
-.btn-toolbar > .btn + .btn,
-.btn-toolbar > .btn-group + .btn,
-.btn-toolbar > .btn + .btn-group {
-  margin-left: 5px;
-}
-.btn-group > .btn {
-  position: relative;
-  -webkit-border-radius: 0;
-  -moz-border-radius: 0;
-  border-radius: 0;
-}
-.btn-group > .btn + .btn {
-  margin-left: -1px;
-}
-.btn-group > .btn,
-.btn-group > .dropdown-menu,
-.btn-group