diff -pruN 3.4.2-7/.appveyor.yml 4.0.1-1/.appveyor.yml
--- 3.4.2-7/.appveyor.yml	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/.appveyor.yml	1970-01-01 00:00:00.000000000 +0000
@@ -1,258 +0,0 @@
-clone_depth: 10
-
-environment:
-  BOOST_PROG_OPTION : "C:/projects/mlpack/\
-  boost_program_options-vc140.1.60.0.0/lib/native/address-model-64/lib/*.*"
-  BOOST_MATH : "C:/projects/mlpack/\
-  boost_math_c99-vc140.1.60.0.0/lib/native/address-model-64/lib/*.*"
-  BOOST_RANDOM : "C:/projects/mlpack/\
-  boost_random-vc140.1.60.0.0/lib/native/address-model-64/lib/*.*"
-  BOOST_SERIALIZATION : "C:/projects/mlpack/\
-  boost_serialization-vc140.1.60.0.0/lib/native/address-model-64/lib/*.*"
-  BOOST_UNIT_TEST : "C:/projects/mlpack/\
-  boost_unit_test_framework-vc140.1.60.0.0/lib/native/address-model-64/lib/*.*"
-  ARMADILLO_DOWNLOAD : "https://data.kurg.org/armadillo-8.400.0.tar.xz"
-  ARMADILLO_LIBRARY : "C:/projects/mlpack/armadillo-8.400.0/\
-  build/Debug/armadillo.lib"
-  BLAS_LIBRARY : "%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/\
-  libopenblas.dll.a"
-  BOOST_INCLUDE : "C:/projects/mlpack/boost.1.60.0.0/lib/native/include"
-  JENKINS_DOC_DOWNLOAD : "http://ci.mlpack.org/job/mlpack%20-%20doxygen%20\
-  build/lastSuccessfulBuild/artifact/build/doc/html/*zip*/html.zip"
-  JENKINS_DOC : "C:/projects/mlpack/dist/win-installer/jenkinsdoc.zip"
-  GIT_VERSION_FILE : "C:/projects/mlpack/src/mlpack/core/util/gitversion.hpp"
-  matrix:
-    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
-      VSVER: Visual Studio 14 2015 Win64
-      MSBUILD: C:\Program Files (x86)\MSBuild\14.0\bin\MSBuild.exe
-    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
-      VSVER: Visual Studio 15 2017 Win64
-      MSBUILD: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe
-# Currently, the VS2019 build seems to always time out.  This seems to be an
-# AppVeyor issue.
-#    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
-#      VSVER: Visual Studio 16 2019
-#      MSBUILD: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Current\Bin\MSBuild.exe
-
-
-configuration: Release
-
-os: Visual Studio 2015
-
-install:
-  - ps: nuget install boost -o "${env:APPVEYOR_BUILD_FOLDER}" -Version 1.60.0
-  - ps: >
-      nuget install boost_unit_test_framework-vc140
-      -o "${env:APPVEYOR_BUILD_FOLDER}" -Version 1.60.0
-  - ps: >
-      nuget install boost_program_options-vc140
-      -o "${env:APPVEYOR_BUILD_FOLDER}" -Version 1.60.0
-  - ps: >
-      nuget install boost_random-vc140
-      -o "${env:APPVEYOR_BUILD_FOLDER}" -Version 1.60.0
-  - ps: >
-      nuget install boost_serialization-vc140
-      -o "${env:APPVEYOR_BUILD_FOLDER}" -Version 1.60.0
-  - ps: >
-      nuget install boost_math_c99-vc140
-      -o "${env:APPVEYOR_BUILD_FOLDER}" -Version 1.60.0
-  - ps: nuget install OpenBLAS -o "${env:APPVEYOR_BUILD_FOLDER}"
-  - set path=C:\Program Files (x86)\WiX Toolset v3.11\bin;%path%
-
-build_script:
-  - mkdir boost_libs
-  - ps: cp ${env:BOOST_PROG_OPTION} C:\projects\mlpack\boost_libs\
-  - ps: cp ${env:BOOST_MATH} C:\projects\mlpack\boost_libs\
-  - ps: cp ${env:BOOST_RANDOM} C:\projects\mlpack\boost_libs\
-  - ps: cp ${env:BOOST_SERIALIZATION} C:\projects\mlpack\boost_libs\
-  - ps: cp ${env:BOOST_UNIT_TEST} C:\projects\mlpack\boost_libs\
-  - echo TEST_ARMA is %ARMADILLO_DOWNLOAD%
-  - >
-    appveyor DownloadFile %ARMADILLO_DOWNLOAD%
-    -FileName armadillo.tar.xz
-  - 7z x armadillo.tar.xz -so -txz | 7z x -si -ttar > nul
-  - cd armadillo-8.400.0 && mkdir build && cd build
-  - >
-    cmake -G "%VSVER%"
-    -DBLAS_LIBRARY:FILEPATH=%BLAS_LIBRARY%
-    -DLAPACK_LIBRARY:FILEPATH=%BLAS_LIBRARY%
-    -DCMAKE_PREFIX:FILEPATH="%APPVEYOR_BUILD_FOLDER%/armadillo"
-    -DBUILD_SHARED_LIBS=OFF
-    -DCMAKE_BUILD_TYPE=Release ..
-  - >
-    "%MSBUILD%" "C:\projects\mlpack\armadillo-8.400.0\build\armadillo.sln"
-    /m /verbosity:quiet /p:Configuration=Release;Platform=x64
-  - cd C:\projects\mlpack && mkdir build && cd build
-  - >
-    cmake -G "%VSVER%"
-    -DBLAS_LIBRARIES:FILEPATH=%BLAS_LIBRARY%
-    -DLAPACK_LIBRARIES:FILEPATH=%BLAS_LIBRARY%
-    -DARMADILLO_INCLUDE_DIR="C:/projects/mlpack/armadillo-8.400.0/include"
-    -DARMADILLO_LIBRARY:FILEPATH=%ARMADILLO_LIBRARY%
-    -DBOOST_INCLUDEDIR:PATH=%BOOST_INCLUDE%
-    -DBOOST_LIBRARYDIR:PATH="C:/projects/mlpack/boost_libs"
-    -DDEBUG=OFF
-    -DPROFILE=OFF
-    -DBUILD_PYTHON_BINDINGS=OFF
-    -DBUILD_GO_BINDINGS=OFF
-    -DBUILD_R_BINDINGS=OFF
-    -DCMAKE_BUILD_TYPE=Release ..
-  - >
-    "%MSBUILD%" "C:\projects\mlpack\build\mlpack.sln"
-    /m /verbosity:minimal /nologo /p:BuildInParallel=true
-    /p:Configuration=Release;Platform=x64
-
-  # Zip Artifacts.
-  - >
-    7z a mlpack-windows-no-libs.zip
-    "%APPVEYOR_BUILD_FOLDER%\build\Release\*.exe"
-  - >
-    7z a mlpack-windows.zip
-    "%APPVEYOR_BUILD_FOLDER%\build\Release\*.*"
-    "%APPVEYOR_BUILD_FOLDER%/OpenBLAS.0.2.14.1/lib/native/lib/x64/*.*"
-
-  # Pulling documentation for the installer.
-  - ps: >
-      try{(new-object net.webclient).DownloadFile(${env:JENKINS_DOC_DOWNLOAD},
-      'C:\projects\mlpack\dist\win-installer\jenkinsdoc.zip')}
-      catch{Write-Output "Unable to pull jenkins doc, skipping!"}
-  - ps: >
-      try{(Add-Type -AssemblyName System.IO.Compression.FileSystem);
-      [System.IO.Compression.ZipFile]::ExtractToDirectory(${env:JENKINS_DOC},
-      'C:\projects\mlpack\dist\win-installer\staging\doc')}
-      catch{Write-Output "Unable to add doc to installer, skipping!"}
-
-  # Preparing installer staging.
-  - cd C:\projects\mlpack\dist\win-installer\staging && mkdir lib
-  - ps: >
-      cp C:\projects\mlpack\build\Release\*.lib
-      C:\projects\mlpack\dist\win-installer\staging\lib\
-  - ps: >
-      cp C:\projects\mlpack\build\Release\*.exp
-      C:\projects\mlpack\dist\win-installer\staging\lib\
-  - ps: >
-      cp C:\projects\mlpack\build\Release\*.dll
-      C:\projects\mlpack\dist\win-installer\staging\
-  - ps: >
-      cp C:\projects\mlpack\build\Release\*.exe
-      C:\projects\mlpack\dist\win-installer\staging\
-  - ps: >
-      cp C:\projects\mlpack\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll
-      C:\projects\mlpack\dist\win-installer\staging\
-  - ps: >
-      cp C:\projects\mlpack\boost_libs\boost_unit_test_framework-vc*.dll
-      C:\projects\mlpack\dist\win-installer\staging\
-  - ps: >
-      cp C:\projects\mlpack\build\include\mlpack
-      C:\projects\mlpack\dist\win-installer\staging -recurse
-  - ps: >
-      cp C:\projects\mlpack\doc\examples
-      C:\projects\mlpack\dist\win-installer\staging -recurse
-  - ps: >
-      cp C:\projects\mlpack\src\mlpack\tests\data\german.csv
-      C:\projects\mlpack\dist\win-installer\staging\examples\sample-ml-app\sample-ml-app\data\
-
-  # Checking current gitversion or mlpack version.
-  - ps: >
-      $ver = (Get-Content
-      "${env:APPVEYOR_BUILD_FOLDER}\src\mlpack\core\util\version.hpp" |
-      where {$_ -like "*MLPACK_VERSION*"});
-      $env:MLPACK_VERSION += $ver[0].substring($ver[0].length - 1, 1) + '.';
-      $env:MLPACK_VERSION += $ver[1].substring($ver[1].length - 1, 1) + '.';
-      $env:MLPACK_VERSION += $ver[2].substring($ver[2].length - 1, 1);
-
-      if (Test-Path ${env:GIT_VERSION_FILE})
-      {
-        $ver = (Get-Content ${env:GIT_VERSION_FILE});
-        $env:INSTALL_VERSION = $ver.Split('"')[1].Split(' ')[1];
-      }
-      else
-      {
-        $env:INSTALL_VERSION = $env:MLPACK_VERSION;
-      }
-  - echo INSTALL_VERSION is %INSTALL_VERSION%
-
-  # Building MSI installer.
-  - cd C:\projects\mlpack\dist\win-installer\mlpack-win-installer
-  - >
-    heat dir ..\staging
-    -cg HeatGenerated
-    -dr INSTALLFOLDER
-    -sreg
-    -srd
-    -var var.HarvestPath
-    -ag
-    -sfrag
-    -out HeatGeneratedFileList.wxs
-  - >
-    candle -dHarvestPath=..\staging
-    -dConfiguration=Release
-    -dOutDir=bin\x64\Release\
-    -dPlatform=x64
-    -dProjectDir=.
-    -dProjectExt=.wixproj
-    -dProjectFileName=mlpack-win-installer.wixproj
-    -dProjectName=mlpack-win-installer
-    -dProjectPath=mlpack-win-installer.wixproj
-    -dTargetDir=.\bin\x64\Release\
-    -dTargetExt=.msi
-    -dTargetFileName=mlpack-windows.msi
-    -dTargetName=mlpack-windows
-    -dTargetPath=.\bin\x64\Release\mlpack-windows.msi
-    -out obj\x64\Release\
-    -arch x64
-    -ext "C:\Program Files (x86)\WiX Toolset v3.11\bin\\WixUIExtension.dll"
-    Product.wxs HeatGeneratedFileList.wxs
-  - >
-    light -out .\bin\x64\Release\mlpack-%INSTALL_VERSION%.msi
-    -pdbout .\bin\x64\Release\mlpack-windows.wixpdb
-    -cultures:null
-    -loc mlpack-localization.wxl
-    -ext "C:\Program Files (x86)\WiX Toolset v3.11\bin\\WixUIExtension.dll"
-    -contentsfile
-    obj\x64\Release\mlpack-win-installer.wixproj.BindContentsFileListnull.txt
-    -outputsfile
-    obj\x64\Release\mlpack-win-installer.wixproj.BindOutputsFileListnull.txt
-    -builtoutputsfile
-    obj\x64\Release\mlpack-win-installer.wixproj.BindBuiltOutputsFileListnull.txt
-    -wixprojectfile
-    mlpack-win-installer.wixproj
-    obj\x64\Release\Product.wixobj
-    obj\x64\Release\HeatGeneratedFileList.wixobj
-
-artifacts:
-  - path: 'build\*.zip'
-    name: mlpack-windows-zip
-
-  - path: 'dist\win-installer\mlpack-win-installer\bin\x64\Release\*.msi'
-    name: mlpack-windows-installer
-
-notifications:
-- provider: Email
-  to:
-  - mlpack-git@lists.mlpack.org
-  on_build_success: true
-  on_build_failure: true
-  on_build_status_changed: true
-
-cache:
-  - packages -> **\packages.config
-  - armadillo.tar.xz -> appveyor.yaml
-
-test_script:
-  # Copy all DLLs into the right place before running the test.
-  - ps: cp C:\projects\mlpack\boost_libs\*.* C:\projects\mlpack\build\
-  - ps: >
-      cp C:\projects\mlpack\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.*
-      C:\projects\mlpack\build\
-  - cd "%APPVEYOR_BUILD_FOLDER%/build/"
-  - >
-    Release\mlpack_test.exe
-    --report_level=detailed
-    --log_level=test_suite --log_format=XML > mlpack_test.xml & exit 0
-  # Attempt to upload results to AppVeyor.
-  - ps: >
-      $wc = New-Object 'System.Net.WebClient';
-      $wc.UploadFile(
-      "https://ci.appveyor.com/api/testresults/xunit/$($env:APPVEYOR_JOB_ID)",
-      (Resolve-Path .\mlpack_test.xml));
diff -pruN 3.4.2-7/board/crosscompile-toolchain.cmake 4.0.1-1/board/crosscompile-toolchain.cmake
--- 3.4.2-7/board/crosscompile-toolchain.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/board/crosscompile-toolchain.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,40 @@
+## This file handles cross-compilation configurations for aarch64,
+## known as arm64. The objective of this file is to find and assign
+## cross-compiler and the entire toolchain.
+##
+## This configuration works best with the buildroot toolchain.  When using this
+## file, be sure to set the TOOLCHAIN_PREFIX and CMAKE_SYSROOT variables,
+## preferably via the CMake configuration command (e.g. `-DCMAKE_SYSROOT=<...>`).
+##
+## Currently, we recommend using buildroot toolchain for
+## cross-compilation. Here is the link to download the toolchains:
+## https://toolchains.bootlin.com/
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSROOT)
+set(TOOLCHAIN_PREFIX "" CACHE STRING "Path for toolchain for cross compiler and other compilation tools.")
+
+# Ensure that CMake tries to build static libraries when testing the compiler.
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+set(CMAKE_AR "${TOOLCHAIN_PREFIX}gcc-ar" CACHE FILEPATH "" FORCE)
+set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}gcc)
+set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}g++)
+set(CMAKE_LINKER ${TOOLCHAIN_PREFIX}ld)
+set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> qcs <TARGET> <LINK_FLAGS> <OBJECTS>")
+set(CMAKE_C_ARCHIVE_FINISH  true)
+set(CMAKE_FORTRAN_COMPILER ${TOOLCHAIN_PREFIX}gfortran)
+set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
+set(CMAKE_OBJCOPY ${TOOLCHAIN_PREFIX}objcopy CACHE INTERNAL "objcopy tool")
+set(CMAKE_SIZE_UTIL ${TOOLCHAIN_PREFIX}size CACHE INTERNAL "size tool")
+
+## Here are the standard ROOT_PATH if you are using the standard toolchain
+## if you are using a different toolchain you have to specify that too.
+set(CMAKE_FIND_ROOT_PATH "${CMAKE_SYSROOT}")
+
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${CMAKE_SYSROOT}" CACHE INTERNAL "" FORCE)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff -pruN 3.4.2-7/board/flags-config.cmake 4.0.1-1/board/flags-config.cmake
--- 3.4.2-7/board/flags-config.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/board/flags-config.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,69 @@
+# This function provides a set of specific flags for each supported board
+# depending on the processor type. The objective is to optimize for size.
+# Thus, all of the following flags are chosen carefully to reduce binary 
+# footprints.
+
+# Set generic minimization flags for all platforms.
+# These flags are the same for all cross-compilation cases.
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Os -fdata-sections -ffunction-sections")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fno-unwind-tables")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-asynchronous-unwind-tables -fvisibility=hidden")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fshort-enums -finline-small-functions")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -findirect-inlining -fno-common") 
+#-flto -fuse-ld=gold # There is an issue with gold link when compiling on 
+# Ubuntu 16. At that point gcc linker did not integrate the flto support 
+# inside and it was a separate plugin that need to be added. Therefore, 
+# this can be added when mlpack Azure CI moves toward Ubuntu 20.
+
+set(BOARD_NAME "" CACHE STRING "Specify Board name to optimize for.")
+string(TOUPPER ${BOARD_NAME} BOARD)
+
+# Set specific platforms CMAKE CXX flags.
+if(BOARD MATCHES "RPI0" OR BOARD MATCHES "RPI1")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mtune=arm1176jzf-s")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "ARMV6")
+  set(OPENBLAS_BINARY "32")
+elseif(BOARD MATCHES "RPI2")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mtune=cortex-a7")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "ARMV7")
+  set(OPENBLAS_BINARY "32")
+elseif(BOARD MATCHES "RPI3")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mtune=cortex-a53")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "CORTEXA53")
+  set(OPENBLAS_BINARY "64")
+elseif(BOARD MATCHES "RPI4")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mtune=cortex-a72")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "CORTEXA72")
+  set(OPENBLAS_BINARY "64")
+elseif(BOARD MATCHES "BV")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "RISCV64_GENERIC")
+  set(OPENBLAS_BINARY "64")
+elseif(BOARD MATCHES "JETSONAGX")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -matune=cortex-a76")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "ARM8")
+  set(OPENBLAS_BINARY "64")
+elseif(BOARD MATCHES "KATAMI")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=pentium3")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "KATAMI")
+  set(OPENBLAS_BINARY "32")
+elseif(BOARD MATCHES "COPPERMINE")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=pentium3")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "COPPERMINE")
+  set(OPENBLAS_BINARY "32")
+elseif(BOARD MATCHES "NORTHWOOD")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=pentium4")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
+  set(OPENBLAS_TARGET "NORTHWOOD")
+  set(OPENBLAS_BINARY "32")
+elseif(BOARD)
+  ## TODO: update documentation with a list of the supported boards.
+  message(FATAL_ERROR "Given BOARD_NAME is not known; please choose a supported board from the list")
+endif()
diff -pruN 3.4.2-7/.ci/ci.yaml 4.0.1-1/.ci/ci.yaml
--- 3.4.2-7/.ci/ci.yaml	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/.ci/ci.yaml	2022-12-29 15:40:18.000000000 +0000
@@ -11,24 +11,24 @@ jobs:
 - job: Linux
   timeoutInMinutes: 360
   pool:
-    vmImage: ubuntu-16.04
+    vmImage: ubuntu-latest
   strategy:
     matrix:
       Plain:
-        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
+        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
       Python:
         binding: 'python'
         python.version: '3.7'
-        CMakeArgs: '-DDEBUG=OFF -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 -DBUILD_GO_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
+        CMakeArgs: '-DDEBUG=OFF -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_PYTHON_BINDINGS=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 -DBUILD_GO_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
       Julia:
         julia.version: '1.3.0'
-        CMakeArgs: '-DDEBUG=OFF -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=ON -DBUILD_GO_BINDINGS=OFF -DJULIA_EXECUTABLE=/opt/julia-1.3.0/bin/julia -DBUILD_R_BINDINGS=OFF'
+        CMakeArgs: '-DDEBUG=OFF -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=ON -DBUILD_GO_BINDINGS=OFF -DJULIA_EXECUTABLE=/opt/julia-1.6.3/bin/julia -DBUILD_R_BINDINGS=OFF'
       Go:
         binding: 'go'
         go.version: '1.11.0'
-        CMakeArgs: '-DDEBUG=OFF -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=ON -DBUILD_R_BINDINGS=OFF'
+        CMakeArgs: '-DDEBUG=OFF -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=ON -DBUILD_R_BINDINGS=OFF'
       Markdown:
-        CMakeArgs: '-DDEBUG=OFF -DPROFILE=OFF -DBUILD_MARKDOWN_BINDINGS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
+        CMakeArgs: '-DDEBUG=OFF -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_MARKDOWN_BINDINGS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
 
   steps:
   - template: linux-steps.yaml
@@ -40,44 +40,25 @@ jobs:
   strategy:
     matrix:
       Plain:
-        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
-        python.version: '2.7'
+        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
+        python.version: '3.7'
       Python:
         binding: 'python'
         python.version: '3.7'
-        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=ON -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
+        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_PYTHON_BINDINGS=ON -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
       Julia:
-        python.version: '2.7'
-        julia.version: '1.3.0'
-        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_JULIA_BINDINGS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
+        python.version: '3.7'
+        julia.version: '1.6.3'
+        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_JULIA_BINDINGS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
       Go:
         binding: 'go'
-        python.version: '2.7'
+        python.version: '3.7'
         go.version: '1.11.0'
-        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=ON -DBUILD_R_BINDINGS=OFF'
+        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=ON -DBUILD_R_BINDINGS=OFF'
 
   steps:
   - template: macos-steps.yaml
 
-- job: WindowsVS15
-  timeoutInMinutes: 360
-  displayName: Windows VS15
-  pool:
-    vmImage: vs2017-win2016
-  strategy:
-    matrix:
-      Plain:
-        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
-        python.version: '2.7'
-        CMakeGenerator: '-G "Visual Studio 15 2017 Win64"'
-        MSBuildVersion: '15.0'
-        ArchiveNoLibs: 'mlpack-windows-vs15-no-libs.zip'
-        ArchiveLibs: 'mlpack-windows-vs15.zip'
-        ArchiveTests: 'mlpack_test-vs15.xml'
-
-  steps:
-  - template: windows-steps.yaml
-
 - job: WindowsVS16
   timeoutInMinutes: 360
   displayName: Windows VS16
@@ -86,7 +67,7 @@ jobs:
   strategy:
     matrix:
       Plain:
-        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
+        CMakeArgs: '-DDEBUG=ON -DPROFILE=OFF -DBUILD_TESTS=ON -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF'
         python.version: '2.7'
         CMakeGenerator: '-G "Visual Studio 16 2019"'
         MSBuildVersion: '16.0'
diff -pruN 3.4.2-7/.ci/linux-steps.yaml 4.0.1-1/.ci/linux-steps.yaml
--- 3.4.2-7/.ci/linux-steps.yaml	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/.ci/linux-steps.yaml	2022-12-29 15:40:18.000000000 +0000
@@ -9,19 +9,20 @@ steps:
   inputs:
     versionSpec: '3.7'
 
-# Install build dependencies
+# Install build dependencies.
 - script: |
-    git clone --depth 1 https://github.com/mlpack/jenkins-conf.git conf
-
-    sudo add-apt-repository ppa:mhier/libboost-latest
-    sudo apt-get update
+    # Workaround to avoid a build failure, because we run out of memory.
+    free -h
+    df -h
+    sudo dd if=/dev/zero of=/usr/swapfile.img bs=1024 count=12M
+    sudo mkswap /usr/swapfile.img
+    sudo swapon /usr/swapfile.img
+    free -h
+    df -h
 
-    # Remove BOOST_ROOT from the environment to prevent attempting to use a
-    # boost which is incompatible with the compiler.
-    unset BOOST_ROOT
-    echo "##vso[task.setvariable variable=BOOST_ROOT]"$BOOST_ROOT
+    git clone --depth 1 https://github.com/mlpack/jenkins-conf.git conf
 
-    sudo apt-get install -y --allow-unauthenticated libopenblas-dev liblapack-dev g++ libboost1.70-dev libarmadillo-dev xz-utils
+    sudo apt-get install -y --allow-unauthenticated libopenblas-dev g++ xz-utils
 
     if [ "$(binding)" == "python" ]; then
       export PYBIN=$(which python)
@@ -30,13 +31,39 @@ steps:
     fi
 
     if [ "a$(julia.version)" != "a" ]; then
-      wget https://julialang-s3.julialang.org/bin/linux/x64/1.3/julia-1.3.0-linux-x86_64.tar.gz
-      sudo tar -C /opt/ -xvpf julia-1.3.0-linux-x86_64.tar.gz
+      wget https://julialang-s3.julialang.org/bin/linux/x64/1.6/julia-1.6.3-linux-x86_64.tar.gz
+      sudo tar -C /opt/ -xvpf julia-1.6.3-linux-x86_64.tar.gz
     fi
 
     # Install armadillo.
-    curl https://data.kurg.org/armadillo-8.400.0.tar.xz | tar -xvJ && cd armadillo*
-    cmake . && make && sudo make install && cd ..
+    curl -k -L https://sourceforge.net/projects/arma/files/armadillo-9.800.6.tar.xz | tar -xvJ && \
+        cd armadillo* && \
+        cmake . && \
+        make && \
+        sudo make install && \
+        cd ..
+
+    # Install ensmallen.
+    wget https://ensmallen.org/files/ensmallen-latest.tar.gz
+        tar -xvzpf ensmallen-latest.tar.gz # Unpack into ensmallen-*/.
+        cd ensmallen-*/ && \
+        sudo cp -vr include/* /usr/include/ && \
+        cd ..
+
+    # Install STB.
+    wget https://mlpack.org/files/stb.tar.gz
+        tar -xvzpf stb.tar.gz # Unpack into stb/.
+        cd stb &&\
+        sudo cp -vr include/* /usr/include/ && \
+        cd ..
+
+    # Install cereal.
+    wget https://github.com/USCiLab/cereal/archive/v1.3.0.tar.gz
+        tar -xvzpf v1.3.0.tar.gz # Unpack into cereal-1.3.0/.
+        cd cereal-1.3.0 && \
+        sudo cp -vr include/cereal /usr/include/ && \
+        cd ..
+
   displayName: 'Install Build Dependencies'
 
 # Configure mlpack (CMake)
@@ -45,19 +72,14 @@ steps:
     mkdir build && cd build
     if [ "$(binding)" == "go" ]; then
       export GOPATH=$PWD/src/mlpack/bindings/go
+      export GO111MODULE=off
       go get -u -t gonum.org/v1/gonum/...
     fi
-    cmake $(CMakeArgs) -DPYTHON_EXECUTABLE=`which python` ..
+    cmake $(CMakeArgs) -DPYTHON_EXECUTABLE=`which python` -DCEREAL_INCLUDE_DIR=/usr/include/ ..
   displayName: 'CMake'
 
 # Build mlpack
-- script: cd build && make
-  condition: eq(variables['CMakeArgs'], '-DDEBUG=ON -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF')
-  displayName: 'Build'
-
-# Build mlpack
 - script: cd build && make -j2
-  condition: ne(variables['CMakeArgs'], '-DDEBUG=ON -DPROFILE=OFF -DBUILD_PYTHON_BINDINGS=OFF -DBUILD_JULIA_BINDINGS=OFF -DBUILD_GO_BINDINGS=OFF -DBUILD_R_BINDINGS=OFF')
   displayName: 'Build'
 
 # Run tests via ctest.
diff -pruN 3.4.2-7/.ci/macos-steps.yaml 4.0.1-1/.ci/macos-steps.yaml
--- 3.4.2-7/.ci/macos-steps.yaml	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/.ci/macos-steps.yaml	2022-12-29 15:40:18.000000000 +0000
@@ -12,9 +12,9 @@ steps:
 # Install Build Dependencies
 - script: |
     set -e
-    sudo xcode-select --switch /Applications/Xcode_12.2.app/Contents/Developer
+    sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
     unset BOOST_ROOT
-    brew install openblas armadillo boost
+    brew install libomp openblas armadillo cereal ensmallen
 
     if [ "$(binding)" == "python" ]; then
       pip install --upgrade pip
@@ -22,10 +22,9 @@ steps:
     fi
 
     if [ "a$(julia.version)" != "a" ]; then
-      brew cask install julia
+      brew install --cask julia
     fi
 
-    git clone --depth 1 https://github.com/mlpack/jenkins-conf.git conf
   displayName: 'Install Build Dependencies'
 
 # Configure mlpack (CMake)
@@ -34,6 +33,7 @@ steps:
     mkdir build && cd build
     if [ "$(binding)" == "go" ]; then
       export GOPATH=$PWD/src/mlpack/bindings/go
+      export GO111MODULE=off
       go get -u -t gonum.org/v1/gonum/...
     fi
     if [ "$(binding)" == "python" ]; then
diff -pruN 3.4.2-7/.ci/windows-steps.yaml 4.0.1-1/.ci/windows-steps.yaml
--- 3.4.2-7/.ci/windows-steps.yaml	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/.ci/windows-steps.yaml	2022-12-29 15:40:18.000000000 +0000
@@ -9,30 +9,23 @@ steps:
 # Fetch build dependencies
 - powershell: |
     nuget install OpenBLAS -o $(Agent.ToolsDirectory)
-    nuget install boost -o $(Agent.ToolsDirectory) -Version 1.60.0
-    nuget install boost_unit_test_framework-vc140 -o $(Agent.ToolsDirectory) -Version 1.60.0
-    nuget install boost_program_options-vc140 -o $(Agent.ToolsDirectory) -Version 1.60.0
-    nuget install boost_random-vc140 -o $(Agent.ToolsDirectory) -Version 1.60.0
-    nuget install boost_serialization-vc140 -o $(Agent.ToolsDirectory) -Version 1.60.0
-    nuget install boost_math_c99-vc140 -o $(Agent.ToolsDirectory) -Version 1.60.0
-    nuget install OpenBLAS -o $(Agent.ToolsDirectory)
+    nuget install unofficial-flayan-cereal -o $(Agent.ToolsDirectory)
+    nuget install ensmallen -o $(Agent.ToolsDirectory) -Version 2.17.0
+    ## Delete all ensmallen dependencies including armadillo headers, we do not need them here
+    Remove-Item $(Agent.ToolsDirectory)\ensmallen.2.17.0\installed\x64-linux\share -Force -Recurse
+    Remove-Item $(Agent.ToolsDirectory)\ensmallen.2.17.0\installed\x64-linux\include\armadillo_bits -Force -Recurse
+    Remove-Item $(Agent.ToolsDirectory)\ensmallen.2.17.0\installed\x64-linux\include\armadillo -Force
 
-    mkdir -p $(Agent.ToolsDirectory)/boost_libs
-    cp $(Agent.ToolsDirectory)/boost_program_options-vc140.1.60.0.0/lib/native/address-model-64/lib/*.* $(Agent.ToolsDirectory)/boost_libs
-    cp $(Agent.ToolsDirectory)/boost_math_c99-vc140.1.60.0.0/lib/native/address-model-64/lib/*.* $(Agent.ToolsDirectory)/boost_libs
-    cp $(Agent.ToolsDirectory)/boost_random-vc140.1.60.0.0/lib/native/address-model-64/lib/*.* $(Agent.ToolsDirectory)/boost_libs
-    cp $(Agent.ToolsDirectory)/boost_serialization-vc140.1.60.0.0/lib/native/address-model-64/lib/*.* $(Agent.ToolsDirectory)/boost_libs
-    cp $(Agent.ToolsDirectory)/boost_unit_test_framework-vc140.1.60.0.0/lib/native/address-model-64/lib/*.* $(Agent.ToolsDirectory)/boost_libs
   displayName: 'Fetch build dependencies'
 
 # Configure armadillo
 - bash: |
     git clone --depth 1 https://github.com/mlpack/jenkins-conf.git conf
 
-    curl -O http://www.ratml.org/misc/armadillo-8.400.0.tar.gz -o armadillo-8.400.0.tar.gz
-    tar -xzvf armadillo-8.400.0.tar.gz
+    curl -O -L https://sourceforge.net/projects/arma/files/armadillo-9.800.6.tar.xz -o armadillo-9.800.6.tar.xz
+    tar -xvf armadillo-9.800.6.tar.xz
 
-    cd armadillo-8.400.0/ && cmake $(CMakeGenerator) \
+    cd armadillo-9.800.6/ && cmake $(CMakeGenerator) \
     -DBLAS_LIBRARY:FILEPATH=$(Agent.ToolsDirectory)/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a \
     -DLAPACK_LIBRARY:FILEPATH=$(Agent.ToolsDirectory)/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a \
     -DCMAKE_PREFIX:FILEPATH=../../armadillo \
@@ -43,7 +36,7 @@ steps:
 # Build armadillo
 - task: MSBuild@1
   inputs:
-    solution: 'armadillo-8.400.0/*.sln'
+    solution: 'armadillo-9.800.6/*.sln'
     msbuildLocationMethod: 'location'
     msbuildVersion: $(MSBuildVersion)
     configuration: 'Release'
@@ -62,10 +55,10 @@ steps:
     $(CMakeArgs) `
     -DBLAS_LIBRARIES:FILEPATH=$(Agent.ToolsDirectory)\OpenBLAS.0.2.14.1\lib\native\lib\x64\libopenblas.dll.a `
     -DLAPACK_LIBRARIES:FILEPATH=$(Agent.ToolsDirectory)\OpenBLAS.0.2.14.1\lib\native\lib\x64\libopenblas.dll.a `
-    -DARMADILLO_INCLUDE_DIR="..\armadillo-8.400.0\include" `
-    -DARMADILLO_LIBRARY="..\armadillo-8.400.0\Release\armadillo.lib" `
-    -DBOOST_INCLUDEDIR=$(Agent.ToolsDirectory)\boost.1.60.0.0\lib\native\include `
-    -DBOOST_LIBRARYDIR=$(Agent.ToolsDirectory)\boost_libs `
+    -DARMADILLO_INCLUDE_DIR="..\armadillo-9.800.6\tmp\include" `
+    -DARMADILLO_LIBRARY="..\armadillo-9.800.6\Release\armadillo.lib" `
+    -DCEREAL_INCLUDE_DIR=$(Agent.ToolsDirectory)\unofficial-flayan-cereal.1.2.2\build\native\include `
+    -DENSMALLEN_INCLUDE_DIR=$(Agent.ToolsDirectory)\ensmallen.2.17.0\installed\x64-linux\include `
     -DBUILD_JULIA_BINDINGS=OFF `
     -DCMAKE_BUILD_TYPE=Release ..
   displayName: 'Configure mlpack'
@@ -78,23 +71,23 @@ steps:
     msbuildVersion: $(MSBuildVersion)
     configuration: 'Release'
     msbuildArchitecture: 'x64'
-    platform: 'x64'
-    msbuildArguments: /m /p:BuildInParallel=true
     maximumCpuCount: false
     clean: false
   displayName: 'Build mlpack'
 
 # Configure mlpack
 - powershell: |
-    cp $(Agent.ToolsDirectory)\boost_libs\*.* build\Release\
-    cp $(Agent.ToolsDirectory)\OpenBLAS.0.2.14.1\lib\native\lib\x64\*.* build\Release\
+    # The .dlls are stored in the bin/ directory, and those are the ones we need
+    # to run with.
     cp $(Agent.ToolsDirectory)\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.* build\Release\
+    cp $(Agent.ToolsDirectory)\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.* build\
   displayName: 'Configure mlpack'
 
 # Run tests via ctest.
 - bash: |
     cd build
-    CTEST_OUTPUT_ON_FAILURE=1 ctest -T Test -C Release .
+    cmake --build . --target mlpack_test --config Release
+    CTEST_OUTPUT_ON_FAILURE=1 ctest -T Test -C Release . -j1
   displayName: 'Run tests via ctest'
 
 # Copy artifacts
@@ -122,6 +115,58 @@ steps:
     replaceExistingArchive: true
   displayName: 'Build artifacts'
 
+# Build MSI installer.
+- powershell: |
+    # Pull the documentation for the installer.
+    try {
+      $url = "http://ci.mlpack.org/job/mlpack%20-%20doxygen%20build/lastSuccessfulBuild/artifact/build/doc/html/*zip*/html.zip"
+      (new-object net.webclient).DownloadFile($url, 'dist\win-installer\jenkinsdoc.zip')
+    }
+    catch {
+      Write-Output "Unable to download precompiled Doxygen documentation from Jenkins!"
+    }
+    try {
+      (Add-Type -AssemblyName System.IO.Compression.FileSystem);
+      [System.IO.Compression.ZipFile]::ExtractToDirectory('dist\win-installer\jenkinsdoc.zip', 'dist\win-installer\mlpack-win-installer\Sources\doc')
+    }
+    catch{Write-Output "Unable to add doc to installer, skipping!"}
+    # Preparing installer staging.
+    mkdir dist\win-installer\mlpack-win-installer\Sources\lib
+    cp build\Release\*.lib dist\win-installer\mlpack-win-installer\Sources\lib\
+    cp build\Release\*.exp dist\win-installer\mlpack-win-installer\Sources\lib\
+    cp build\Release\*.dll dist\win-installer\mlpack-win-installer\Sources\
+    cp build\Release\*.exe dist\win-installer\mlpack-win-installer\Sources\
+    cp $(Agent.ToolsDirectory)\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll dist\win-installer\mlpack-win-installer\Sources\
+    cp src\mlpack\*.hpp dist\win-installer\mlpack-win-installer\Sources\
+    cp src\mlpack\core dist\win-installer\mlpack-win-installer\Sources\ -recurse
+    cp src\mlpack\methods dist\win-installer\mlpack-win-installer\Sources\ -recurse
+    cp doc\examples dist\win-installer\mlpack-win-installer\Sources\ -recurse
+    cp src\mlpack\tests\data\german.csv dist\win-installer\mlpack-win-installer\Sources\examples\sample-ml-app\sample-ml-app\data\
+    # Check current git version or mlpack version.
+    $ver = (Get-Content "src\mlpack\core\util\version.hpp" | where {$_ -like "*MLPACK_VERSION*"});
+    $env:MLPACK_VERSION += $ver[0].substring($ver[0].length - 1, 1) + '.';
+    $env:MLPACK_VERSION += $ver[1].substring($ver[1].length - 1, 1) + '.';
+    $env:MLPACK_VERSION += $ver[2].substring($ver[2].length - 1, 1);
+
+    if (Test-Path "src/mlpack/core/util/gitversion.hpp")
+    {
+      $ver = (Get-Content "src/mlpack/core/util/gitversion.hpp");
+      $env:INSTALL_VERSION = $ver.Split('"')[1].Split(' ')[1];
+    }
+    else
+    {
+      $env:INSTALL_VERSION = $env:MLPACK_VERSION;
+    }
+
+    # Build the MSI installer.
+    cd dist\win-installer\mlpack-win-installer
+    & 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\MSBuild.exe' `
+        -t:rebuild `
+        -p:Configuration=Release `
+        -p:TreatWarningsAsErrors=True `
+        mlpack-win-installer.wixproj
+  displayName: 'Build MSI Windows installer'
+
 # Publish artifacts to Azure Pipelines
 - task: PublishBuildArtifacts@1
   inputs:
@@ -138,6 +183,11 @@ steps:
     pathtoPublish: 'build/Testing/'
     artifactName: 'Tests'
   displayName: 'Publish artifacts test results'
+- task: PublishBuildArtifacts@1
+  inputs:
+    pathtoPublish: 'dist\win-installer\mlpack-win-installer\bin\Release\mlpack-windows.msi'
+    artifactName: mlpack-windows-installer
+  displayName: 'Publish Windows MSI installer'
 
 # Publish test results to Azure Pipelines
 - task: PublishTestResults@2
diff -pruN 3.4.2-7/CMake/Autodownload.cmake 4.0.1-1/CMake/Autodownload.cmake
--- 3.4.2-7/CMake/Autodownload.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/Autodownload.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,49 @@
+## This function auto-downloads mlpack dependencies.
+## You need to pass the LINK to download from, the name of
+## the dependency, and the name of the compressed package such as
+## armadillo.tar.gz
+## At each download, this module sets a GENERIC_INCLUDE_DIR path,
+## which means that you need to set the main path for the include
+## directories for each package.
+## Note that, the package should be compressed only as .tar.gz
+
+macro(get_deps LINK DEPS_NAME PACKAGE)
+  if (NOT EXISTS "${CMAKE_BINARY_DIR}/deps/${PACKAGE}")
+    file(DOWNLOAD ${LINK}
+           "${CMAKE_BINARY_DIR}/deps/${PACKAGE}"
+            STATUS DOWNLOAD_STATUS_LIST LOG DOWNLOAD_LOG
+            SHOW_PROGRESS)
+    list(GET DOWNLOAD_STATUS_LIST 0 DOWNLOAD_STATUS)
+    if (DOWNLOAD_STATUS EQUAL 0)
+      execute_process(COMMAND ${CMAKE_COMMAND} -E
+          tar xf "${CMAKE_BINARY_DIR}/deps/${PACKAGE}"
+          WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/deps/")
+    else ()
+      list(GET DOWNLOAD_STATUS_LIST 1 DOWNLOAD_ERROR)
+      message(FATAL_ERROR
+          "Could not download ${DEPS_NAME}! Error code ${DOWNLOAD_STATUS}: ${DOWNLOAD_ERROR}!  Error log: ${DOWNLOAD_LOG}")
+    endif()
+  endif()
+  # Get the name of the directory.
+  file (GLOB DIRECTORIES RELATIVE "${CMAKE_BINARY_DIR}/deps/"
+      "${CMAKE_BINARY_DIR}/deps/${DEPS_NAME}*.*")
+  if(${DEPS_NAME} MATCHES "stb")
+    file (GLOB DIRECTORIES RELATIVE "${CMAKE_BINARY_DIR}/deps/"
+        "${CMAKE_BINARY_DIR}/deps/${DEPS_NAME}")
+  endif()
+  # list(FILTER) is not available on 3.5 or older, but try to keep
+  # configuring without filtering the list anyway 
+  # (it works only if the file is present as .tar.gz).
+  if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.6.0")
+    list(FILTER DIRECTORIES EXCLUDE REGEX ".*\.tar\.gz")
+  endif ()
+  list(LENGTH DIRECTORIES DIRECTORIES_LEN)
+  if (DIRECTORIES_LEN GREATER 0)
+    list(GET DIRECTORIES 0 DEPENDENCY_DIR)
+    set(GENERIC_INCLUDE_DIR "${CMAKE_BINARY_DIR}/deps/${DEPENDENCY_DIR}/include")
+    install(DIRECTORY "${GENERIC_INCLUDE_DIR}/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+  else ()
+    message(FATAL_ERROR
+            "Problem unpacking ${DEPS_NAME}! Expected only one directory ${DEPS_NAME};. Try to remove the directory ${CMAKE_BINARY_DIR}/deps and reconfigure.")
+  endif ()
+endmacro()
diff -pruN 3.4.2-7/CMake/CheckAtomic.cmake 4.0.1-1/CMake/CheckAtomic.cmake
--- 3.4.2-7/CMake/CheckAtomic.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/CheckAtomic.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,80 @@
+INCLUDE(CheckCXXSourceCompiles)
+INCLUDE(CheckLibraryExists)
+
+# Sometimes linking against libatomic is required for atomic ops, if
+# the platform doesn't support lock-free atomics.
+
+function(check_working_cxx_atomics varname)
+set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11")
+CHECK_CXX_SOURCE_COMPILES("
+#include <atomic>
+
+std::atomic<int> x;
+
+int main()
+{
+  return std::atomic_is_lock_free(&x);
+}
+" ${varname})
+set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
+endfunction(check_working_cxx_atomics)
+
+function(check_working_cxx_atomics64 varname)
+set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+set(CMAKE_REQUIRED_FLAGS "-std=c++11 ${CMAKE_REQUIRED_FLAGS}")
+CHECK_CXX_SOURCE_COMPILES("
+#include <atomic>
+#include <cstdint>
+
+std::atomic<uint64_t> x (0);
+
+int main()
+{
+  uint64_t i = x.load(std::memory_order_relaxed);
+  return std::atomic_is_lock_free(&x);
+}
+" ${varname})
+set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
+endfunction(check_working_cxx_atomics64)
+
+
+# This isn't necessary on MSVC, so avoid command-line switch annoyance
+# by only running on GCC-like hosts.
+if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
+  # First check if atomics work without the library.
+  check_working_cxx_atomics(HAVE_CXX_ATOMICS_WITHOUT_LIB)
+  # If not, check if the library exists, and atomics work with it.
+  if(NOT HAVE_CXX_ATOMICS_WITHOUT_LIB)
+    check_library_exists(atomic __atomic_fetch_add_4 "" HAVE_LIBATOMIC)
+    if( HAVE_LIBATOMIC )
+      list(APPEND CMAKE_REQUIRED_LIBRARIES "atomic")
+      check_working_cxx_atomics(HAVE_CXX_ATOMICS_WITH_LIB)
+      if (NOT HAVE_CXX_ATOMICS_WITH_LIB)
+        message(FATAL_ERROR "Host compiler must support std::atomic!")
+      endif()
+    else()
+      message(FATAL_ERROR "Host compiler appears to require libatomic, but cannot find it.")
+    endif()
+  endif()
+endif()
+# Check for 64 bit atomic operations.
+if(MSVC)
+  set(HAVE_CXX_ATOMICS64_WITHOUT_LIB True)
+else()
+  check_working_cxx_atomics64(HAVE_CXX_ATOMICS64_WITHOUT_LIB)
+endif()
+
+# If not, check if the library exists, and atomics work with it.
+if(NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB)
+  check_library_exists(atomic __atomic_load_8 "" HAVE_CXX_LIBATOMICS64)
+  if(HAVE_CXX_LIBATOMICS64)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES "atomic")
+    check_working_cxx_atomics64(HAVE_CXX_ATOMICS64_WITH_LIB)
+    if (NOT HAVE_CXX_ATOMICS64_WITH_LIB)
+      message(FATAL_ERROR "Host compiler must support std::atomic!")
+    endif()
+  else()
+    message(FATAL_ERROR "Host compiler appears to require libatomic, but cannot find it.")
+  endif()
+endif()
diff -pruN 3.4.2-7/CMake/ConfigureCrossCompile.cmake 4.0.1-1/CMake/ConfigureCrossCompile.cmake
--- 3.4.2-7/CMake/ConfigureCrossCompile.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/ConfigureCrossCompile.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,44 @@
+# This file adds the necessary configurations to cross compile
+# mlpack for embedded systems. You need to set the following variables
+# from the command line: CMAKE_SYSROOT and TOOLCHAIN_PREFIX.
+# This file will compile OpenBLAS if it is downloaded and it is not
+# available on your system in order to find the BLAS library.  If OpenBLAS will
+# be compiled, the OPENBLAS_TARGET variable must be set.  This can be done
+# by, e.g., setting BOARD_NAME (which will set OPENBLAS_TARGET in
+# `board/flags-config.cmake`).
+
+if (CMAKE_CROSSCOMPILING)
+  include(board/flags-config.cmake)
+  if (NOT CMAKE_SYSROOT AND (NOT TOOLCHAIN_PREFIX))
+    message(FATAL_ERROR "Neither CMAKE_SYSROOT nor TOOLCHAIN_PREFIX are set; please set both of them and try again.")
+  elseif(NOT CMAKE_SYSROOT)
+    message(FATAL_ERROR "Cannot configure: CMAKE_SYSROOT must be set when performing cross-compiling!")
+  elseif(NOT TOOLCHAIN_PREFIX)
+    message(FATAL_ERROR "Cannot configure: TOOLCHAIN_PREFIX must be set when performing cross-compiling!")
+  endif()
+endif()
+
+macro(search_openblas version)
+  set(BLA_STATIC ON)
+  find_package(BLAS)
+  if (NOT BLAS_FOUND OR (NOT BLAS_LIBRARIES))
+    if(NOT OPENBLAS_TARGET)
+      message(FATAL_ERROR "Cannot compile OpenBLAS: OPENBLAS_TARGET is not set.  Either set that variable, or set BOARD_NAME correctly!")
+    endif()
+    get_deps(https://github.com/xianyi/OpenBLAS/releases/download/v${version}/OpenBLAS-${version}.tar.gz OpenBLAS OpenBLAS-${version}.tar.gz)
+    if (NOT MSVC)
+      if (NOT EXISTS "${CMAKE_BINARY_DIR}/deps/OpenBLAS-${version}/libopenblas.a")
+        execute_process(COMMAND make TARGET=${OPENBLAS_TARGET} BINARY=${OPENBLAS_BINARY} HOSTCC=gcc CC=${CMAKE_C_COMPILER} FC=${CMAKE_FORTRAN_COMPILER} NO_SHARED=1
+                        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/deps/OpenBLAS-${version})
+      endif()
+      file(GLOB OPENBLAS_LIBRARIES "${CMAKE_BINARY_DIR}/deps/OpenBLAS-${version}/libopenblas.a")
+      set(BLAS_openblas_LIBRARY ${OPENBLAS_LIBRARIES})
+      set(LAPACK_openblas_LIBRARY ${OPENBLAS_LIBRARIES}) 
+      set(BLA_VENDOR OpenBLAS)
+      set(BLAS_FOUND ON)
+    endif()
+  endif()
+  find_library(GFORTRAN NAMES libgfortran.a)
+  find_library(PTHREAD NAMES libpthread.a)
+  set(CROSS_COMPILE_SUPPORT_LIBRARIES ${GFORTRAN} ${PTHREAD})
+endmacro()
diff -pruN 3.4.2-7/CMake/ConfigureFile.cmake 4.0.1-1/CMake/ConfigureFile.cmake
--- 3.4.2-7/CMake/ConfigureFile.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/ConfigureFile.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,11 @@
+# ConfigureFile.cmake: generate an mlpack binding file given input
+# arguments.
+#
+# This file depends on the following variables being set:
+#
+#  * GENERATE_CPP_IN: the .cpp.in file to configure.
+#  * GENERATE_CPP_OUT: the .cpp file we'll generate.
+#
+# Any other defined variables will be passed on to the file that is being
+# generated.
+configure_file(${GENERATE_CPP_IN} ${GENERATE_CPP_OUT})
diff -pruN 3.4.2-7/CMake/ConfigureGenerate.cmake 4.0.1-1/CMake/ConfigureGenerate.cmake
--- 3.4.2-7/CMake/ConfigureGenerate.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/ConfigureGenerate.cmake	1970-01-01 00:00:00.000000000 +0000
@@ -1,11 +0,0 @@
-# ConfigureGenerate.cmake: generate an mlpack binding file given input
-# arguments.
-#
-# This file depends on the following variables being set:
-#
-#  * GENERATE_CPP_IN: the .cpp.in file to configure.
-#  * GENERATE_CPP_OUT: the .cpp file we'll generate.
-#
-# Any other defined variables will be passed on to the file that is being
-# generated.
-configure_file(${GENERATE_CPP_IN} ${GENERATE_CPP_OUT})
diff -pruN 3.4.2-7/CMake/CreateArmaConfigInfo.cmake 4.0.1-1/CMake/CreateArmaConfigInfo.cmake
--- 3.4.2-7/CMake/CreateArmaConfigInfo.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/CreateArmaConfigInfo.cmake	1970-01-01 00:00:00.000000000 +0000
@@ -1,68 +0,0 @@
-# Using the CMake tools to create the file arma_config.hpp, which contains
-# information on the Armadillo configuration when mlpack was compiled.  This
-# assumes ${ARMADILLO_INCLUDE_DIR} is set.  In addition, we must be careful to
-# avoid overwriting arma_config.hpp with the exact same information, because
-# this may trigger a new complete rebuild, which is undesired.
-if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mlpack/core/util/arma_config.hpp")
-  file(READ "${CMAKE_CURRENT_SOURCE_DIR}/src/mlpack/core/util/arma_config.hpp"
-      OLD_FILE_CONTENTS)
-else()
-  set(OLD_FILE_CONTENTS "")
-endif()
-
-if(CMAKE_SIZEOF_VOID_P EQUAL 4)
-  set(ARMA_HAS_64BIT_WORD 0)
-else()
-  set(ARMA_HAS_64BIT_WORD 1)
-endif()
-
-# Now use the value we gathered to generate the new file contents.
-if(ARMA_HAS_64BIT_WORD EQUAL 0)
-  set(ARMA_64BIT_WORD_DEFINE "#define MLPACK_ARMA_NO64BIT_WORD")
-else()
-  set(ARMA_64BIT_WORD_DEFINE "#define MLPACK_ARMA_64BIT_WORD")
-endif()
-
-# Next we need to know if we are compiling with OpenMP support.
-# Other places in the CMake configuration should have already done the
-# find(OpenMP).
-if (OPENMP_FOUND)
-  set(ARMA_HAS_OPENMP_DEFINE "#define MLPACK_ARMA_USE_OPENMP")
-else ()
-  set(ARMA_HAS_OPENMP_DEFINE "#define MLPACK_ARMA_DONT_USE_OPENMP")
-endif ()
-
-set(NEW_FILE_CONTENTS
-"/**
- * @file arma_config.hpp
- *
- * This is an autogenerated file which contains the configuration of Armadillo
- * at the time mlpack was built.  If you modify anything in here by hand, your
- * warranty is void, your house may catch fire, and we're not going to call the
- * police when your program segfaults so hard that robbers come to your house
- * and take everything you own.  If you do decide, against better judgment, to
- * modify anything at all in this file, and you are reporting a bug, be
- * absolutely certain to mention that you've done something stupid in this file
- * first.
- *
- * In short: don't touch this file.
- */
-#ifndef MLPACK_CORE_UTIL_ARMA_CONFIG_HPP
-#define MLPACK_CORE_UTIL_ARMA_CONFIG_HPP
-
-${ARMA_64BIT_WORD_DEFINE}
-
-${ARMA_HAS_OPENMP_DEFINE}
-
-#endif
-")
-
-# Did the contents of the file change at all?  If not, don't write it.
-if(NOT "${OLD_FILE_CONTENTS}" STREQUAL "${NEW_FILE_CONTENTS}")
-  # We have a reason to write the new file.
-  message(STATUS "Regenerating arma_config.hpp.")
-  file(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/src/mlpack/core/util/arma_config.hpp")
-  file(WRITE "${CMAKE_CURRENT_SOURCE_DIR}/src/mlpack/core/util/arma_config.hpp"
-      "${NEW_FILE_CONTENTS}")
-endif()
-
diff -pruN 3.4.2-7/CMake/FindArmadillo.cmake 4.0.1-1/CMake/FindArmadillo.cmake
--- 3.4.2-7/CMake/FindArmadillo.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/FindArmadillo.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -77,26 +77,47 @@ else()
   # don't link to armadillo in this case
   set(ARMADILLO_LIBRARY "")
 endif()
+
 # Link to support libraries in either case on MSVC.
 if(NOT _ARMA_USE_WRAPPER OR MSVC)
   if(_ARMA_USE_LAPACK)
-    if(ARMADILLO_FIND_QUIETLY OR NOT ARMADILLO_FIND_REQUIRED)
-      find_package(LAPACK QUIET)
+    if(APPLE)
+      # Use -framework Accelerate to link against the Accelerate framework on
+      # MacOS; ignore OpenBLAS or other variants.
+      set(LAPACK_LIBRARIES "-framework Accelerate")
+      set(LAPACK_FOUND YES)
     else()
-      find_package(LAPCK REQUIRED)
+      if(ARMADILLO_FIND_QUIETLY OR NOT ARMADILLO_FIND_REQUIRED)
+        find_package(LAPACK QUIET)
+      else()
+        find_package(LAPACK REQUIRED)
+      endif()
     endif()
+
     if(LAPACK_FOUND)
       set(_ARMA_SUPPORT_LIBRARIES "${_ARMA_SUPPORT_LIBRARIES}" "${LAPACK_LIBRARIES}")
     endif()
   endif()
   if(_ARMA_USE_BLAS)
-    if(ARMADILLO_FIND_QUIETLY OR NOT ARMADILLO_FIND_REQUIRED)
-      find_package(BLAS QUIET)
+    if(APPLE)
+      # Use -framework Accelerate to link against the Accelerate framework on
+      # MacOS; ignore OpenBLAS or other variants.
+      set(BLAS_LIBRARIES "-framework Accelerate")
+      set(BLAS_FOUND YES)
     else()
-      find_package(BLAS REQUIRED)
+      if(ARMADILLO_FIND_QUIETLY OR NOT ARMADILLO_FIND_REQUIRED)
+        find_package(BLAS QUIET)
+      else()
+        find_package(BLAS REQUIRED)
+      endif()
     endif()
+
     if(BLAS_FOUND)
-      set(_ARMA_SUPPORT_LIBRARIES "${_ARMA_SUPPORT_LIBRARIES}" "${BLAS_LIBRARIES}")
+      # Avoid doubly linking (not that it makes much difference other than a
+      # nicer command-line).
+      if (NOT BLAS_LIBRARIES EQUAL LAPACK_LIBRARIES)
+        set(_ARMA_SUPPORT_LIBRARIES "${_ARMA_SUPPORT_LIBRARIES}" "${BLAS_LIBRARIES}")
+      endif ()
     endif()
   endif()
   if(_ARMA_USE_ARPACK)
@@ -154,5 +175,6 @@ unset(__ARMA_SUPPORT_INCLUDE_DIRS)
 
 # Hide internal variables
 mark_as_advanced(
-  ARMADILLO_INCLUDE_DIR
-  ARMADILLO_LIBRARY)
+    ARMADILLO_INCLUDE_DIR
+    ARMADILLO_LIBRARY
+    ARMADILLO_LIBRARIES)
diff -pruN 3.4.2-7/CMake/Findcereal.cmake 4.0.1-1/CMake/Findcereal.cmake
--- 3.4.2-7/CMake/Findcereal.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/Findcereal.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,56 @@
+#Findcereal.cmake
+find_path(CEREAL_INCLUDE_DIR
+  NAMES cereal
+  PATHS "$ENV{ProgramFiles}/cereal/include"
+  )
+
+if(CEREAL_INCLUDE_DIR)
+  # ------------------------------------------------------------------------
+  #  Extract version information from <CEREAL>
+  # ------------------------------------------------------------------------
+  set(CEREAL_FOUND YES)
+  set(CEREAL_VERSION_MAJOR 0)
+  set(CEREAL_VERSION_MINOR 0)
+  set(CEREAL_VERSION_PATCH 0)
+
+  if(EXISTS "${CEREAL_INCLUDE_DIR}/cereal/version.hpp")
+
+    # Read and parse cereal version header file for version number
+    file(READ "${CEREAL_INCLUDE_DIR}/cereal/version.hpp"
+        _CEREAL_HEADER_CONTENTS)
+    string(REGEX REPLACE ".*#define CEREAL_VERSION_MAJOR ([0-9]+).*" "\\1"
+        CEREAL_VERSION_MAJOR "${_CEREAL_HEADER_CONTENTS}")
+    string(REGEX REPLACE ".*#define CEREAL_VERSION_MINOR ([0-9]+).*" "\\1"
+        CEREAL_VERSION_MINOR "${_CEREAL_HEADER_CONTENTS}")
+    string(REGEX REPLACE ".*#define CEREAL_VERSION_PATCH ([0-9]+).*" "\\1"
+        CEREAL_VERSION_PATCH "${_CEREAL_HEADER_CONTENTS}")
+
+  elseif(EXISTS "${CEREAL_INCLUDE_DIR}/cereal/details/polymorphic_impl_fwd.hpp")
+
+    set(CEREAL_VERSION_MAJOR 1)
+    set(CEREAL_VERSION_MINOR 2)
+    set(CEREAL_VERSION_PATCH 0)
+  elseif(EXISTS "${CEREAL_INCLUDE_DIR}/cereal/types/valarray.hpp")
+
+    set(CEREAL_VERSION_MAJOR 1)
+    set(CEREAL_VERSION_MINOR 1)
+    set(CEREAL_VERSION_PATCH 2)
+  elseif(EXISTS "${CEREAL_INCLUDE_DIR}/cereal/cereal.hpp")
+
+  set(CEREAL_VERSION_MAJOR 1)
+  set(CEREAL_VERSION_MINOR 1)
+  set(CEREAL_VERSION_PATCH 1)
+else()
+
+  set(CEREAL_FOUND NO)
+  endif()
+  set(CEREAL_VERSION_STRING "${CEREAL_VERSION_MAJOR}.${CEREAL_VERSION_MINOR}.${CEREAL_VERSION_PATCH}")
+endif ()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(cereal
+  REQUIRED_VARS CEREAL_INCLUDE_DIR
+  VERSION_VAR CEREAL_VERSION_STRING
+  )
+
+mark_as_advanced(CEREAL_INCLUDE_DIR)
diff -pruN 3.4.2-7/CMake/FindGo.cmake 4.0.1-1/CMake/FindGo.cmake
--- 3.4.2-7/CMake/FindGo.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/FindGo.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -14,8 +14,8 @@ if (GO_EXECUTABLE)
        RESULT_VARIABLE RESULT
   )
   if (RESULT EQUAL 0)
-    string(REGEX REPLACE ".*([0-9]+\\.[0-9]+\(\\.[0-9]+\)?).*" "\\1"
-        GO_VERSION_STRING ${GO_VERSION_STRING})
+    string(REGEX MATCH "([0-9]+\\.[0-9]+\(\\.[0-9]+\)?)"
+        GO_VERSION_STRING "${GO_VERSION_STRING}")
   endif()
 endif()
 
diff -pruN 3.4.2-7/CMake/FindGonum.cmake 4.0.1-1/CMake/FindGonum.cmake
--- 3.4.2-7/CMake/FindGonum.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/FindGonum.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -4,20 +4,21 @@
 if (GO_EXECUTABLE)
   execute_process(
      COMMAND ${GO_EXECUTABLE} list gonum.org/v1/gonum/mat
-     OUTPUT_VARIABLE GONUM_VERSION_STRING
+     OUTPUT_VARIABLE GONUM_RAW_STRING
      RESULT_VARIABLE RESULT
   )
   if (RESULT EQUAL 0)
-    string(REGEX REPLACE ".*([0-9]+\\.[0-9]+\\.[0-9]+[\n]+).*" "\\1"
-        GONUM_VERSION_STRING ${GONUM_VERSION_STRING})
     string(REGEX REPLACE "\n$" ""
-        GONUM_VERSION_STRING ${GONUM_VERSION_STRING})
+        GONUM_RAW_STRING ${GONUM_RAW_STRING})
+    if ("${GONUM_RAW_STRING}" STREQUAL "gonum.org/v1/gonum/mat")
+      set(GONUM_FOUND 1)
+    endif()
   endif()
 endif()
 
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(
   Gonum
-  REQUIRED_VARS GONUM_VERSION_STRING
+  REQUIRED_VARS GONUM_FOUND
   FAIL_MESSAGE "Gonum not found"
 )
diff -pruN 3.4.2-7/CMake/FindMatlabMex.cmake 4.0.1-1/CMake/FindMatlabMex.cmake
--- 3.4.2-7/CMake/FindMatlabMex.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/FindMatlabMex.cmake	1970-01-01 00:00:00.000000000 +0000
@@ -1,110 +0,0 @@
-# This module looks for mex, the MATLAB compiler.
-# The following variables are defined when the script completes:
-#   MATLAB_MEX: location of mex compiler
-#   MATLAB_ROOT: root of MATLAB installation
-#   MATLABMEX_FOUND: 0 if not found, 1 if found
-
-set(MATLABMEX_FOUND 0)
-
-if(WIN32)
-  # This is untested but taken from the older FindMatlab.cmake script as well as
-  # the modifications by Ramon Casero and Tom Doel for Gerardus.
-
-  # Search for a version of Matlab available, starting from the most modern one
-  # to older versions.
-  foreach(MATVER "7.20" "7.19" "7.18" "7.17" "7.16" "7.15" "7.14" "7.13" "7.12"
-"7.11" "7.10" "7.9" "7.8" "7.7" "7.6" "7.5" "7.4")
-    if((NOT DEFINED MATLAB_ROOT)
-        OR ("${MATLAB_ROOT}" STREQUAL "")
-        OR ("${MATLAB_ROOT}" STREQUAL "/registry"))
-      get_filename_component(MATLAB_ROOT
-        "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MathWorks\\MATLAB\\${MATVER};MATLABROOT]"
-        ABSOLUTE)
-      set(MATLAB_VERSION ${MATVER})
-    endif()
-      OR ("${MATLAB_ROOT}" STREQUAL "")
-      OR ("${MATLAB_ROOT}" STREQUAL "/registry"))
-  endforeach()
-
-  find_program(MATLAB_MEX
-    mex
-    ${MATLAB_ROOT}/bin
-    )
-else()
-  # Check if this is a Mac.
-  if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    # This code is untested but taken from the older FindMatlab.cmake script as
-    # well as the modifications by Ramon Casero and Tom Doel for Gerardus.
-
-   set(LIBRARY_EXTENSION .dylib)
-
-    # If this is a Mac and the attempts to find MATLAB_ROOT have so far failed,~
-    # we look in the applications folder
-    if((NOT DEFINED MATLAB_ROOT) OR ("${MATLAB_ROOT}" STREQUAL ""))
-
-    # Search for a version of Matlab available, starting from the most modern
-    # one to older versions
-      foreach(MATVER "R2013b" "R2013a" "R2012b" "R2012a" "R2011b" "R2011a"
-"R2010b" "R2010a" "R2009b" "R2009a" "R2008b")
-        if((NOT DEFINED MATLAB_ROOT) OR ("${MATLAB_ROOT}" STREQUAL ""))
-          if(EXISTS /Applications/MATLAB_${MATVER}.app)
-            set(MATLAB_ROOT /Applications/MATLAB_${MATVER}.app)
-
-          endif()
-        endif()
-      endforeach()
-
-    endif()
-
-    find_program(MATLAB_MEX
-      mex
-      PATHS
-      ${MATLAB_ROOT}/bin
-    )
-
-  else()
-    # On a Linux system.  The goal is to find MATLAB_ROOT.
-    set(LIBRARY_EXTENSION .so)
-
-    find_program(MATLAB_MEX_POSSIBLE_LINK
-      mex
-      PATHS
-      ${MATLAB_ROOT}/bin
-      /opt/matlab/bin
-      /usr/local/matlab/bin
-      $ENV{HOME}/matlab/bin
-      # Now all the versions
-      /opt/matlab/[rR]20[0-9][0-9][abAB]/bin
-      /usr/local/matlab/[rR]20[0-9][0-9][abAB]/bin
-      /opt/matlab-[rR]20[0-9][0-9][abAB]/bin
-      /opt/matlab_[rR]20[0-9][0-9][abAB]/bin
-      /usr/local/matlab-[rR]20[0-9][0-9][abAB]/bin
-      /usr/local/matlab_[rR]20[0-9][0-9][abAB]/bin
-      $ENV{HOME}/matlab/[rR]20[0-9][0-9][abAB]/bin
-      $ENV{HOME}/matlab-[rR]20[0-9][0-9][abAB]/bin
-      $ENV{HOME}/matlab_[rR]20[0-9][0-9][abAB]/bin
-    )
-
-    get_filename_component(MATLAB_MEX "${MATLAB_MEX_POSSIBLE_LINK}" REALPATH)
-    get_filename_component(MATLAB_BIN_ROOT "${MATLAB_MEX}" PATH)
-    # Strip ./bin/.
-    get_filename_component(MATLAB_ROOT "${MATLAB_BIN_ROOT}" PATH)
-  endif()
-endif()
-
-if(NOT EXISTS "${MATLAB_MEX}" AND "${MatlabMex_FIND_REQUIRED}")
-  message(FATAL_ERROR "Could not find MATLAB mex compiler; try specifying MATLAB_ROOT.")
-else()
-  if(EXISTS "${MATLAB_MEX}")
-    message(STATUS "Found MATLAB mex compiler: ${MATLAB_MEX}")
-    message(STATUS "MATLAB root: ${MATLAB_ROOT}")
-    set(MATLABMEX_FOUND 1)
-  endif()
-endif()
-
-mark_as_advanced(
-  MATLAB_MEX
-  MATLABMEX_FOUND
-  MATLAB_ROOT
-)
-
diff -pruN 3.4.2-7/CMake/FindRModule.cmake 4.0.1-1/CMake/FindRModule.cmake
--- 3.4.2-7/CMake/FindRModule.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/FindRModule.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -25,9 +25,12 @@ function(find_r_module module)
             OUTPUT_VARIABLE _version_compare
             OUTPUT_STRIP_TRAILING_WHITESPACE)
 
-        string(REGEX MATCHALL "‘[0-9._]*’" _version_compare "${_version_compare}")
-        string(REGEX REPLACE "‘" "" _version_compare "${_version_compare}")
-        string(REGEX REPLACE "’" "" _version_compare "${_version_compare}")
+        # Different versions of R may enclose the version number in different
+        # delimiters.  Sometimes, semicolons show up too.
+        string(REGEX MATCHALL "[‘'][0-9._]*[’']" _version_compare "${_version_compare}")
+        string(REGEX REPLACE ";" "" _version_compare "${_version_compare}")
+        string(REGEX REPLACE "[‘']" "" _version_compare "${_version_compare}")
+        string(REGEX REPLACE "[’']" "" _version_compare "${_version_compare}")
 
         # Compare the version of the package using compareVersion().
         execute_process(COMMAND ${RSCRIPT_EXECUTABLE} "-e"
diff -pruN 3.4.2-7/CMake/GeneratePkgConfig.cmake 4.0.1-1/CMake/GeneratePkgConfig.cmake
--- 3.4.2-7/CMake/GeneratePkgConfig.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/GeneratePkgConfig.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -5,13 +5,13 @@
 # And our goal in this file is to generate/configure mlpack.pc.
 
 # First, we need to extract the version string.
-if (NOT EXISTS "${CMAKE_BINARY_DIR}/include/mlpack/core/util/version.hpp")
+if (NOT EXISTS "${MLPACK_SOURCE_DIR}/src/mlpack/core/util/version.hpp")
   message(FATAL_ERROR "Cannot open "
-      "${CMAKE_BINARY_DIR}/include/mlpack/core/util/version.hpp to extract "
+      "${MLPACK_SOURCE_DIR}/src/mlpack/core/util/version.hpp to extract "
       "version!")
 endif ()
 
-file(READ "${CMAKE_BINARY_DIR}/include/mlpack/core/util/version.hpp"
+file(READ "${MLPACK_SOURCE_DIR}/src/mlpack/core/util/version.hpp"
     VERSION_HPP_CONTENTS)
 string(REGEX REPLACE ".*#define MLPACK_VERSION_MAJOR ([0-9]+).*" "\\1"
     MLPACK_VERSION_MAJOR "${VERSION_HPP_CONTENTS}")
diff -pruN 3.4.2-7/CMake/go/AppendModel.cmake 4.0.1-1/CMake/go/AppendModel.cmake
--- 3.4.2-7/CMake/go/AppendModel.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/go/AppendModel.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -44,8 +44,8 @@ function(append_model SERIALIZATION_FILE
           else ()
             string(APPEND GOMODEL_SAFE_TYPE ${MODEL_CHAR})
           endif()
-        endif()
-     endforeach()
+        endforeach()
+      endif()
 
       # See if the model type already exists.
       file(READ "${SERIALIZATION_FILE}" SERIALIZATION_FILE_CONTENTS)
@@ -64,20 +64,22 @@ function(append_model SERIALIZATION_FILE
             "  mem unsafe.Pointer \n"
             "}\n\n"
             "func (m *${GOMODEL_SAFE_TYPE}) alloc"
-            "${MODEL_SAFE_TYPE}(identifier string) {\n"
-            "  m.mem = C.mlpackGet${MODEL_SAFE_TYPE}Ptr(C.CString(identifier))\n"
+            "${MODEL_SAFE_TYPE}(params *params, identifier string) {\n"
+            "  m.mem = C.mlpackGet${MODEL_SAFE_TYPE}Ptr(params.mem,\n"
+            "      C.CString(identifier))\n"
             "  runtime.KeepAlive(m)\n"
             "}\n\n"
             "func (m *${GOMODEL_SAFE_TYPE}) get"
-            "${MODEL_SAFE_TYPE}(identifier string) {\n"
-            "  m.alloc${MODEL_SAFE_TYPE}(identifier)\n"
+            "${MODEL_SAFE_TYPE}(params *params, identifier string) {\n"
+            "  m.alloc${MODEL_SAFE_TYPE}(params, identifier)\n"
             "}\n\n"
-            "func set${MODEL_SAFE_TYPE}(identifier string, ptr *"
-            "${GOMODEL_SAFE_TYPE}) {\n"
-            " C.mlpackSet${MODEL_SAFE_TYPE}"
-            "Ptr(C.CString(identifier), (unsafe.Pointer)(ptr.mem))\n"
+            "func set${MODEL_SAFE_TYPE}(params* params,\n"
+            "                           identifier string,\n"
+            "                           ptr *${GOMODEL_SAFE_TYPE}) {\n"
+            "  C.mlpackSet${MODEL_SAFE_TYPE}Ptr(params.mem,\n"
+            "      C.CString(identifier), ptr.mem)\n"
             "}\n\n")
-      endif ()
+      endif()
     endforeach ()
   endif()
 endfunction()
diff -pruN 3.4.2-7/CMake/go/ConfigureGoHCPP.cmake 4.0.1-1/CMake/go/ConfigureGoHCPP.cmake
--- 3.4.2-7/CMake/go/ConfigureGoHCPP.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/go/ConfigureGoHCPP.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -24,10 +24,13 @@ if (${NUM_MODEL_TYPES} GREATER 0)
     # Generate the definition.
     set(MODEL_PTR_DEFNS "${MODEL_PTR_DEFNS}
 // Set the pointer to a ${MODEL_TYPE} parameter.
-extern void mlpackSet${MODEL_SAFE_TYPE}Ptr(const char* identifier, void* value);
+extern void mlpackSet${MODEL_SAFE_TYPE}Ptr(void* params,
+                                           const char* identifier,
+                                           void* value);
 
 // Get the pointer to a ${MODEL_TYPE} parameter.
-extern void* mlpackGet${MODEL_SAFE_TYPE}Ptr(const char* identifier);
+extern void* mlpackGet${MODEL_SAFE_TYPE}Ptr(void* params,
+                                            const char* identifier);
 "
 )
 
@@ -35,17 +38,22 @@ extern void* mlpackGet${MODEL_SAFE_TYPE}
     set(MODEL_PTR_IMPLS "${MODEL_PTR_IMPLS}
 // Set the pointer to a ${MODEL_TYPE} parameter.
 extern \"C\"  void mlpackSet${MODEL_SAFE_TYPE}Ptr(
+    void* params,
     const char* identifier,
     void* value)
 {
-  mlpack::util::SetParamPtr<${MODEL_TYPE}>(identifier,
-  static_cast<${MODEL_TYPE}*>(value));
+  util::Params& p = *((util::Params*) params);
+  mlpack::util::SetParamPtr<${MODEL_TYPE}>(p, identifier,
+      static_cast<${MODEL_TYPE}*>(value));
 }
 
 // Get the pointer to a ${MODEL_TYPE} parameter.
-extern \"C\" void *mlpackGet${MODEL_SAFE_TYPE}Ptr(const char* identifier)
+extern \"C\" void *mlpackGet${MODEL_SAFE_TYPE}Ptr(
+    void* params,
+    const char* identifier)
 {
-  ${MODEL_TYPE} *modelptr = IO::GetParam<${MODEL_TYPE}*>(identifier);
+  util::Params& p = *((util::Params*) params);
+  ${MODEL_TYPE} *modelptr = p.Get<${MODEL_TYPE}*>(identifier);
   return modelptr;
 }
 ")
diff -pruN 3.4.2-7/CMake/julia/AppendType.cmake 4.0.1-1/CMake/julia/AppendType.cmake
--- 3.4.2-7/CMake/julia/AppendType.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/julia/AppendType.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -32,8 +32,19 @@ function(append_type TYPES_FILE PROGRAM_
         # function.
         file(APPEND
             "${TYPES_FILE}"
-            "struct ${MODEL_SAFE_TYPE}\n"
+            "mutable struct ${MODEL_SAFE_TYPE}\n"
             "  ptr::Ptr{Nothing}\n"
+            "\n"
+            "  # Construct object and set finalizer to free memory if `finalize` is true.\n"
+            "  function ${MODEL_SAFE_TYPE}(ptr::Ptr{Nothing}; finalize::Bool = false)::${MODEL_SAFE_TYPE}\n"
+            "    result = new(ptr)\n"
+            "    if finalize\n"
+            "      finalizer(\n"
+            "          x -> _Internal.${PROGRAM_NAME}_internal.Delete${MODEL_SAFE_TYPE}(x.ptr),\n"
+            "          result)\n"
+            "    end\n"
+            "    return result\n"
+            "  end\n"
             "end\n"
             "\n")
       endif ()
diff -pruN 3.4.2-7/CMake/julia/ConfigureJuliaHCPP.cmake 4.0.1-1/CMake/julia/ConfigureJuliaHCPP.cmake
--- 3.4.2-7/CMake/julia/ConfigureJuliaHCPP.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/julia/ConfigureJuliaHCPP.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -26,9 +26,13 @@ if (${NUM_MODEL_TYPES} GREATER 0)
     # Generate the definition.
     set(MODEL_PTR_DEFNS "${MODEL_PTR_DEFNS}
 // Get the pointer to a ${MODEL_TYPE} parameter.
-void* IO_GetParam${MODEL_SAFE_TYPE}Ptr(const char* paramName);
+void* GetParam${MODEL_SAFE_TYPE}Ptr(void* params, const char* paramName);
 // Set the pointer to a ${MODEL_TYPE} parameter.
-void IO_SetParam${MODEL_SAFE_TYPE}Ptr(const char* paramName, void* ptr);
+void SetParam${MODEL_SAFE_TYPE}Ptr(void* params,
+                                   const char* paramName,
+                                   void* ptr);
+// Delete a ${MODEL_TYPE} pointer.
+void Delete${MODEL_SAFE_TYPE}Ptr(void* ptr);
 // Serialize a ${MODEL_TYPE} pointer.
 char* Serialize${MODEL_SAFE_TYPE}Ptr(void* ptr, size_t* length);
 // Deserialize a ${MODEL_TYPE} pointer.
@@ -38,16 +42,27 @@ void* Deserialize${MODEL_SAFE_TYPE}Ptr(c
     # Generate the implementation.
     set(MODEL_PTR_IMPLS "${MODEL_PTR_IMPLS}
 // Get the pointer to a ${MODEL_TYPE} parameter.
-void* IO_GetParam${MODEL_SAFE_TYPE}Ptr(const char* paramName)
+void* GetParam${MODEL_SAFE_TYPE}Ptr(void* params, const char* paramName)
 {
-  return (void*) IO::GetParam<${MODEL_TYPE}*>(paramName);
+  util::Params* p = (util::Params*) params;
+  return (void*) p->Get<${MODEL_TYPE}*>(paramName);
 }
 
 // Set the pointer to a ${MODEL_TYPE} parameter.
-void IO_SetParam${MODEL_SAFE_TYPE}Ptr(const char* paramName, void* ptr)
+void SetParam${MODEL_SAFE_TYPE}Ptr(void* params,
+                                   const char* paramName,
+                                   void* ptr)
 {
-  IO::GetParam<${MODEL_TYPE}*>(paramName) = (${MODEL_TYPE}*) ptr;
-  IO::SetPassed(paramName);
+  util::Params* p = (util::Params*) params;
+  p->Get<${MODEL_TYPE}*>(paramName) = (${MODEL_TYPE}*) ptr;
+  p->SetPassed(paramName);
+}
+
+// Delete a ${MODEL_TYPE} pointer.
+void Delete${MODEL_SAFE_TYPE}Ptr(void* ptr)
+{
+  ${MODEL_TYPE}* modelPtr = (${MODEL_TYPE}*) ptr;
+  delete modelPtr;
 }
 
 // Serialize a ${MODEL_TYPE} pointer.
@@ -55,9 +70,9 @@ char* Serialize${MODEL_SAFE_TYPE}Ptr(voi
 {
   std::ostringstream oss;
   {
-    boost::archive::binary_oarchive oa(oss);
+    cereal::BinaryOutputArchive oa(oss);
     ${MODEL_TYPE}* model = (${MODEL_TYPE}*) ptr;
-    oa << boost::serialization::make_nvp(\"${MODEL_SAFE_TYPE}\", model);
+    oa(CEREAL_POINTER(model));
   }
 
   *length = oss.str().length();
@@ -72,16 +87,16 @@ char* Serialize${MODEL_SAFE_TYPE}Ptr(voi
 // Deserialize a ${MODEL_TYPE} pointer.
 void* Deserialize${MODEL_SAFE_TYPE}Ptr(const char* buffer, const size_t length)
 {
-  ${MODEL_TYPE}* t = new ${MODEL_TYPE}();
+  ${MODEL_TYPE}* model = new ${MODEL_TYPE}();
 
   std::istringstream iss(std::string(buffer, length));
   {
-    boost::archive::binary_iarchive ia(iss);
-    ia >> boost::serialization::make_nvp(\"${MODEL_SAFE_TYPE}\", t);
+    cereal::BinaryInputArchive ia(iss);
+    ia(CEREAL_POINTER(model));
   }
 
   // Julia will be responsible for freeing this.
-  return (void*) t;
+  return (void*) model;
 }
 ")
   endforeach ()
diff -pruN 3.4.2-7/CMake/mlpack_coverage.in 4.0.1-1/CMake/mlpack_coverage.in
--- 3.4.2-7/CMake/mlpack_coverage.in	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/mlpack_coverage.in	1970-01-01 00:00:00.000000000 +0000
@@ -1,135 +0,0 @@
-#!/bin/bash
-# This script gets the test coverage for mlpack_test.
-test_case="ALL"
-gcov_loc=""
-token=""
-clean=true
-current_log_file=`date +'%Y.%h.%d:%H:%M:%S-coverage.log'`
-current_coverage_file=`date +'%Y.%h.%d:%H:%M:%S-coverage.info'`
-max_cov_count=50000
-
-# default directories
-root_dir="../"
-
-# Extract arguments.
-for i in "$@"
-do
-case $i in
-    -h|--help)
-    echo "Usage: mlpack_coverage --help|-h"
-    echo "       mlpack_coverage [-r=test_suite] [-g=gcov_tool_location]"
-    echo "                       [--token=coveralls_token]"
-    echo "Optional parameters:"
-    echo "    -n|--no_test               Do not run test before coverage computation"
-    echo "    -r|--run_test              Run tests with specific test suite"
-    echo "       --no_clean              Do not remove existing gcda file"
-    echo "    -g|--gcov_tool_location    Gcov location if not default"
-    echo "    -t|--token                 Upload to coveralls with given token"
-    echo "       --max_cov_count         Max line coverage count (default 50000)"
-    echo "       --root_dir              Set the root directory from which gcov will be called. (default ../)"
-    exit 0
-    shift
-    ;;
-    -n|--no_test)
-    test_case=""
-    shift
-    ;;
-    -r=*|--run_test=*)
-    test_case="${i#*=}"
-    shift # past argument=value
-    ;;
-    --no_clean)
-    clean=false
-    shift
-    ;;
-    -g=*|--gcov_tool_location=*)
-    gcov_loc="${i#*=}"
-    shift # past argument=value
-    ;;
-    -t=*|--token=*)
-    token="${i#*=}"
-    shift # past argument=value
-    ;;
-    --max_cov_count)
-    max_cov_count="${i#*=}"
-    shift
-    ;;
-    --root_dir=*)
-    root_dir="${i#*=}"
-    shift
-    ;;
-    *)
-            # unknown option
-    ;;
-esac
-done
-
-if [ "$clean" = true ]; then
-  echo "Deleting existing coverage data..."
-  find ./ -name "*.gcda" -type f -delete
-fi
-
-# Initial pass.
-echo "Generating primary coverage report."
-[[ -d ./coveragehistory/ ]] || mkdir coveragehistory
-lcov -b . -c -i -d ./ -o .coverage.wtest.base > ./coveragehistory/$current_log_file
-
-# Run the tests.
-if [ "$test_case" = "ALL" ]; then
-  echo "Running all the tests..."
-  "@CMAKE_BINARY_DIR@"/bin/mlpack_test
-elif ! [ "$test_case" = "" ]; then
-  echo "Running test suite: $test_case"
-  "@CMAKE_BINARY_DIR@"/bin/mlpack_test --run_test=$test_case
-fi
-
-# Generate coverage based on executed tests.
-echo "Computing coverage..."
-if [ "$gcov_loc" = "" ];
-then lcov -b . -c -d ./ -o .coverage.wtest.run >> ./coveragehistory/$current_log_file
-else
-  lcov -b . -c -d ./ -o .coverage.wtest.run --gcov-tool=$gcov_loc >> ./coveragehistory/$current_log_file
-fi
-
-echo "Filtering coverage files..."
-# Clear negative entries in coverage file
-sed -E 's/-([0-9]+)/$max_cov_count/g' -i .coverage.wtest.run
-# Merge coverage tracefiles.
-lcov -a .coverage.wtest.base -a .coverage.wtest.run  -o .coverage.total >> ./coveragehistory/$current_log_file
-
-# Filtering, extracting project files.
-lcov -e .coverage.total "@CMAKE_CURRENT_SOURCE_DIR@/src/mlpack/*" -o .coverage.total.filtered >> ./coveragehistory/$current_log_file
-
-# Filtering, removing test-files and main.cpp.
-lcov -r .coverage.total.filtered "@CMAKE_CURRENT_SOURCE_DIR@/src/mlpack/*/*_main.cpp" -o .coverage.total.filtered >> ./coveragehistory/$current_log_file
-lcov -r .coverage.total.filtered "@CMAKE_CURRENT_SOURCE_DIR@/src/mlpack/tests/*" -o .coverage.total.filtered >> ./coveragehistory/$current_log_file
-
-# Remove untestable files.
-lcov -r .coverage.total.filtered "@CMAKE_CURRENT_SOURCE_DIR@/src/mlpack/core/util/gitversion.hpp" -o .coverage.total.filtered >> ./coveragehistory/$current_log_file
-lcov -r .coverage.total.filtered "@CMAKE_CURRENT_SOURCE_DIR@/src/mlpack/core/util/arma_config.hpp" -o .coverage.total.filtered >> ./coveragehistory/$current_log_file
-
-# Extra:  Replace /build/ with /src/ to unify directories.
-cat .coverage.total.filtered > .coverage.total
-
-# Extra: Clear up previous data, create html folder.
-if [[ -d ./coverage/ ]] ; then
-    rm -rf ./coverage/*
-else
-    mkdir coverage
-fi
-
-# Step 9: Generate webpage.
-genhtml -o ./coverage/ .coverage.total
-
-# Extra: Preserve coverage file in coveragehistory folder.
-coverage_file=$current_coverage_file
-cp .coverage.total ./coveragehistory/$current_coverage_file
-
-# Clean temporary coverage files.
-#rm .coverage.*
-
-# Upload the result to coveralls if token is provided.
-if ! [ "$token" = "" ]; then
-  cpp-coveralls -n -r $root_dir -b $root_dir -l ./coveragehistory/$current_coverage_file -t "$token" --max-cov-count $max_cov_count
-fi
-
diff -pruN 3.4.2-7/CMake/R/ConfigureRCPP.cmake 4.0.1-1/CMake/R/ConfigureRCPP.cmake
--- 3.4.2-7/CMake/R/ConfigureRCPP.cmake	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMake/R/ConfigureRCPP.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -35,17 +35,33 @@ if (NOT (MODEL_FILE_TYPE MATCHES "\"${MO
       set(MODEL_PTR_IMPLS "${MODEL_PTR_IMPLS}
 // Get the pointer to a ${MODEL_TYPE} parameter.
 // [[Rcpp::export]]
-SEXP IO_GetParam${MODEL_SAFE_TYPE}Ptr(const std::string& paramName)
+SEXP GetParam${MODEL_SAFE_TYPE}Ptr(SEXP params,
+                                   const std::string& paramName,
+                                   SEXP inputModels)
 {
-  return std::move((${MODEL_PTR_TYPEDEF}) IO::GetParam<${MODEL_TYPE}*>(paramName));
+  util::Params& p = *Rcpp::as<Rcpp::XPtr<util::Params>>(params);
+  Rcpp::List inputModelsList(inputModels);
+  ${MODEL_TYPE}* modelPtr = p.Get<${MODEL_TYPE}*>(paramName);
+  for (int i = 0; i < inputModelsList.length(); ++i)
+  {
+    ${MODEL_PTR_TYPEDEF} inputModel =
+        Rcpp::as<${MODEL_PTR_TYPEDEF}>(inputModelsList[i]);
+    // Don't create a new XPtr---just reuse the one given as input, so that we
+    // don't end up deleting it twice.
+    if (inputModel.get() == modelPtr)
+      return inputModel;
+  }
+
+  return std::move((${MODEL_PTR_TYPEDEF}) p.Get<${MODEL_TYPE}*>(paramName));
 }
 
 // Set the pointer to a ${MODEL_TYPE} parameter.
 // [[Rcpp::export]]
-void IO_SetParam${MODEL_SAFE_TYPE}Ptr(const std::string& paramName, SEXP ptr)
+void SetParam${MODEL_SAFE_TYPE}Ptr(SEXP params, const std::string& paramName, SEXP ptr)
 {
-  IO::GetParam<${MODEL_TYPE}*>(paramName) =  Rcpp::as<${MODEL_PTR_TYPEDEF}>(ptr);
-  IO::SetPassed(paramName);
+  util::Params& p = *Rcpp::as<Rcpp::XPtr<util::Params>>(params);
+  p.Get<${MODEL_TYPE}*>(paramName) = Rcpp::as<${MODEL_PTR_TYPEDEF}>(ptr);
+  p.SetPassed(paramName);
 }
 
 // Serialize a ${MODEL_TYPE} pointer.
@@ -54,9 +70,9 @@ Rcpp::RawVector Serialize${MODEL_SAFE_TY
 {
   std::ostringstream oss;
   {
-    boost::archive::binary_oarchive oa(oss);
-    oa << boost::serialization::make_nvp(\"${MODEL_SAFE_TYPE}\",
-          *Rcpp::as<${MODEL_PTR_TYPEDEF}>(ptr));
+    cereal::BinaryOutputArchive oa(oss);
+    oa(cereal::make_nvp(\"${MODEL_SAFE_TYPE}\",
+          *Rcpp::as<${MODEL_PTR_TYPEDEF}>(ptr)));
   }
 
   Rcpp::RawVector raw_vec(oss.str().size());
@@ -76,8 +92,8 @@ SEXP Deserialize${MODEL_SAFE_TYPE}Ptr(Rc
 
   std::istringstream iss(std::string((char *) &str[0], str.size()));
   {
-    boost::archive::binary_iarchive ia(iss);
-    ia >> boost::serialization::make_nvp(\"${MODEL_SAFE_TYPE}\", *ptr);
+    cereal::BinaryInputArchive ia(iss);
+    ia(cereal::make_nvp(\"${MODEL_SAFE_TYPE}\", *ptr));
   }
 
   // R will be responsible for freeing this.
diff -pruN 3.4.2-7/CMake/stb/a.cpp 4.0.1-1/CMake/stb/a.cpp
--- 3.4.2-7/CMake/stb/a.cpp	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/stb/a.cpp	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,15 @@
+#include "a.hpp"
+
+// Include the static implementation of all STB functions.
+#define STB_IMAGE_STATIC
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_STATIC
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+
+#include <stb_image.h>
+#include <stb_image_write.h>
+
+void A::A()
+{
+  // Do nothing, just to check if the STB library is a working version.
+}
diff -pruN 3.4.2-7/CMake/stb/a.hpp 4.0.1-1/CMake/stb/a.hpp
--- 3.4.2-7/CMake/stb/a.hpp	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/stb/a.hpp	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,10 @@
+#ifndef A_HPP
+#define A_HPP
+
+namespace A {
+
+void A();
+
+}
+
+#endif
diff -pruN 3.4.2-7/CMake/stb/b.cpp 4.0.1-1/CMake/stb/b.cpp
--- 3.4.2-7/CMake/stb/b.cpp	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/stb/b.cpp	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,15 @@
+#include "b.hpp"
+
+// Include the static implementation of all STB functions.
+#define STB_IMAGE_STATIC
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_STATIC
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+
+#include <stb_image.h>
+#include <stb_image_write.h>
+
+void B::B()
+{
+  // Do nothing, just to check if the STB library is a working version.
+}
diff -pruN 3.4.2-7/CMake/stb/b.hpp 4.0.1-1/CMake/stb/b.hpp
--- 3.4.2-7/CMake/stb/b.hpp	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/stb/b.hpp	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,10 @@
+#ifndef B_HPP
+#define B_HPP
+
+namespace B {
+
+void B();
+
+}
+
+#endif
diff -pruN 3.4.2-7/CMake/stb/main.cpp 4.0.1-1/CMake/stb/main.cpp
--- 3.4.2-7/CMake/stb/main.cpp	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/stb/main.cpp	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,16 @@
+// The purpose of this file is to include STB's implementation in two separate
+// translation units.  One is a.cpp, and one is b.cpp.  This file simply
+// includes both of those, so that when we get to the linking phase, we will
+// have to link both translation units.
+//
+// Some versions of STB fail to correctly define some functions as
+// static---which will cause a linking failure.  Thus, if this fails to
+// compile, then mlpack's use of STB will fail.
+#include "a.hpp"
+#include "b.hpp"
+
+int main()
+{
+  A::A();
+  B::B();
+}
diff -pruN 3.4.2-7/CMake/TestError.cmake 4.0.1-1/CMake/TestError.cmake
--- 3.4.2-7/CMake/TestError.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/TestError.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,3 @@
+# A very simple script to issue an error if the mlpack_test target is not
+# defined.
+message(FATAL_ERROR "To build the mlpack_test target, reconfigure CMake with the BUILD_TESTS option set to ON!  (i.e. `cmake -DBUILD_TESTS=ON ../`)")
diff -pruN 3.4.2-7/CMake/TestStaticSTB.cmake 4.0.1-1/CMake/TestStaticSTB.cmake
--- 3.4.2-7/CMake/TestStaticSTB.cmake	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/CMake/TestStaticSTB.cmake	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,42 @@
+# Author: Omar Shrit
+
+#[=======================================================================[.rst:
+TestForSTB
+----------
+
+Test to verify if the available version of STB contains a working static
+implementation that can be used from multiple translation units.
+
+::
+
+    CMAKE_HAS_WORKING_STATIC_STB - defined by the results
+#]=======================================================================]
+
+if(NOT DEFINED CMAKE_HAS_WORKING_STATIC_STB)
+  message(STATUS "Check that STB static implementation mode links correctly...")
+  try_compile(CMAKE_HAS_WORKING_STATIC_STB
+      ${CMAKE_BINARY_DIR}/CMakeFiles/CMakeTmp/
+      SOURCES
+        ${CMAKE_SOURCE_DIR}/CMake/stb/main.cpp
+        ${CMAKE_SOURCE_DIR}/CMake/stb/a.cpp
+        ${CMAKE_SOURCE_DIR}/CMake/stb/b.cpp
+      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${STB_IMAGE_INCLUDE_DIR}"
+      OUTPUT_VARIABLE out)
+  if (CMAKE_HAS_WORKING_STATIC_STB)
+    message(STATUS "Check that STB static implementation mode links "
+        "correctly... success")
+    set(CMAKE_HAS_WORKING_STATIC_STB 1 CACHE INTERNAL
+	"Does STB static implementation mode link correctly")
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+        "Determining if STB's static implementation can link correctly passed "
+        "with the following output:\n${out}\n\n")
+  else ()
+    message(STATUS "Check that STB static implementation mode links "
+        "correctly... fail")
+    set(CMAKE_HAS_WORKING_STATIC_STB 0 CACHE INTERNAL
+        "Does STB static implementation mode link correctly")
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
+        "Determining if STB's static implementation can link correctly failed "
+        "with the following output:\n${out}\n\n")
+  endif ()
+endif()
diff -pruN 3.4.2-7/CMakeLists.txt 4.0.1-1/CMakeLists.txt
--- 3.4.2-7/CMakeLists.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/CMakeLists.txt	2022-12-29 15:40:18.000000000 +0000
@@ -1,37 +1,60 @@
-cmake_minimum_required(VERSION 3.3.2)
+cmake_minimum_required(VERSION 3.6)
 project(mlpack C CXX)
 
 include(CMake/cotire.cmake)
 include(CMake/CheckHash.cmake)
+include(CMake/Autodownload.cmake)
+include(CMake/ConfigureCrossCompile.cmake)
+include(CMake/CheckAtomic.cmake)
 
 # First, define all the compilation options.
 # We default to debugging mode for developers.
 option(DEBUG "Compile with debugging information." OFF)
 option(PROFILE "Compile with profiling information." OFF)
 option(ARMA_EXTRA_DEBUG "Compile with extra Armadillo debugging symbols." OFF)
-option(MATLAB_BINDINGS "Compile MATLAB bindings if MATLAB is found." OFF)
 option(TEST_VERBOSE "Run test cases with verbose output." OFF)
-option(BUILD_TESTS "Build tests." ON)
+option(BUILD_TESTS "Build tests. (Note: time consuming!)" OFF)
 option(BUILD_CLI_EXECUTABLES "Build command-line executables." ON)
-option(DISABLE_DOWNLOADS "Disable downloads of dependencies during build." OFF)
-option(DOWNLOAD_ENSMALLEN "If ensmallen is not found, download it." ON)
-option(DOWNLOAD_STB_IMAGE "Download stb_image for image loading." ON)
+option(DOWNLOAD_DEPENDENCIES "Automatically download dependencies if not available." OFF)
 option(BUILD_GO_SHLIB "Build Go shared library." OFF)
 
-# Set minimum library version required by mlpack.
-set(ARMADILLO_VERSION "8.400.0")
+# Set minimum library versions required by mlpack.
+#
+# For Armadillo, try to keep the minimum required version less than or equal to
+# what's available on the current Ubuntu LTS or most recent stable RHEL release.
+# See https://github.com/mlpack/mlpack/issues/3033 for some more discussion.
+set(ARMADILLO_VERSION "9.800")
 set(ENSMALLEN_VERSION "2.10.0")
-set(BOOST_VERSION "1.58")
+set(CEREAL_VERSION "1.1.2")
 
+# If BUILD_SHARED_LIBS is OFF then the mlpack library will be built statically.
+# In addition, all mlpack CLI bindings will be linked statically as well.
 if (WIN32)
   option(BUILD_SHARED_LIBS
-      "Compile shared libraries (if OFF, static libraries are compiled)." OFF)
+      "Compile shared objects for tests and bindings (if OFF, static libraries and binaries are compiled)." OFF)
 
   set(DLL_COPY_DIRS "" CACHE STRING "List of directories (separated by ';') containing DLLs to copy for runtime.")
   set(DLL_COPY_LIBS "" CACHE STRING "List of DLLs (separated by ';') that should be copied for runtime.")
-else ()
+elseif(CMAKE_CROSSCOMPILING)
   option(BUILD_SHARED_LIBS
-      "Compile shared libraries (if OFF, static libraries are compiled)." ON)
+      "Compile shared libraries (if OFF, static libraries and binaries are compiled)." OFF)
+else()
+  option(BUILD_SHARED_LIBS
+      "Compile shared objects for tests and bindings (if OFF, static libraries and binaries are compiled)." ON)
+endif()
+
+# Enable auto-download if we are cross compiling.
+if (CMAKE_CROSSCOMPILING)
+  set(DOWNLOAD_DEPENDENCIES ON)
+endif()
+
+# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES.
+if (NOT BUILD_SHARED_LIBS)
+  if(WIN32)
+    list(INSERT CMAKE_FIND_LIBRARY_SUFFIXES 0 .lib .a)
+  else()
+    set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
+  endif()
 endif()
 
 # Detect whether the user passed BUILD_PYTHON_BINDINGS in order to determine if
@@ -50,7 +73,7 @@ if (BUILD_JULIA_BINDINGS)
 else()
   set(FORCE_BUILD_JULIA_BINDINGS OFF)
 endif()
-option(BUILD_JULIA_BINDINGS "Build Julia bindings." ON)
+option(BUILD_JULIA_BINDINGS "Build Julia bindings." OFF)
 
 # Detect whether the user passed BUILD_GO_BINDINGS in order to determine if
 # we should fail if Go isn't found.
@@ -59,7 +82,7 @@ if (BUILD_GO_BINDINGS)
 else()
   set(FORCE_BUILD_GO_BINDINGS OFF)
 endif()
-option(BUILD_GO_BINDINGS "Build Go bindings." ON)
+option(BUILD_GO_BINDINGS "Build Go bindings." OFF)
 
 # If building Go bindings then build go shared libraries.
 if (BUILD_GO_BINDINGS)
@@ -73,36 +96,31 @@ if (BUILD_R_BINDINGS)
 else()
   set(FORCE_BUILD_R_BINDINGS OFF)
 endif()
-option(BUILD_R_BINDINGS "Build R bindings." ON)
+option(BUILD_R_BINDINGS "Build R bindings." OFF)
 # Build Markdown bindings for documentation.  This is used as part of website
 # generation.
 option(BUILD_MARKDOWN_BINDINGS "Build Markdown bindings for website documentation." OFF)
 
-option(BUILD_WITH_COVERAGE
-    "Build with support for code coverage tools (gcc only)." OFF)
 option(MATHJAX
     "Use MathJax for HTML Doxygen output (disabled by default)." OFF)
-option(FORCE_CXX11
-    "Don't check that the compiler supports C++11, just assume it.  Make sure to specify any necessary flag to enable C++11 as part of CXXFLAGS." OFF)
 option(USE_OPENMP "If available, use OpenMP for parallelization." ON)
 enable_testing()
 
-# Set required standard to C++11.
-set(CMAKE_CXX_STANDARD 11)
+# Set required standard to C++14.
+set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-# Include modules in the CMake directory.
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMake")
-
-# Disable any downloads if needed.
-if (DISABLE_DOWNLOADS)
-  set(DOWNLOAD_ENSMALLEN OFF)
-  set(DOWNLOAD_STB_IMAGE OFF)
+# Ensure that GCC is new enough, if the compiler is GCC.
+if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5)
+  message(FATAL_ERROR "GCC version (${CMAKE_CXX_COMPILER_VERSION}) is too old! 5.x or newer is required.")
 endif ()
 
-# If we are on a Unix-like system, use the GNU install directories module.
+# Include modules in the CMake directory.
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/CMake")
+
+# If we are not using Visual Studio, use the GNU install directories module.
 # Otherwise set the values manually.
-if (UNIX)
+if (NOT MSVC)
   include(GNUInstallDirs)
 else ()
   set(CMAKE_INSTALL_BINDIR ${CMAKE_INSTALL_PREFIX}/bin)
@@ -113,12 +131,12 @@ else ()
 endif ()
 
 # This is as of yet unused.
-#option(PGO "Use profile-guided optimization if not a debug build" ON)
+# option(PGO "Use profile-guided optimization if not a debug build" ON)
 
 # Set the CFLAGS and CXXFLAGS depending on the options the user specified.
 # Only GCC-like compilers support -Wextra, and other compilers give tons of
 # output for -Wall, so only -Wall and -Wextra on GCC.
-if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+if (CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
   # Ensure that we can't compile with clang 3.4, since this causes strange
   # issues.
   if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.5)
@@ -134,40 +152,51 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
 endif()
 
-# These support libraries are used if we need to link against something
-# specific.  This list is a subset of MLPACK_LIBRARIES.
-set(COMPILER_SUPPORT_LIBRARIES "")
+# Check if atomics need -latomic linking.
+#include(CheckAtomic)
+if (NOT HAVE_CXX_ATOMICS_WITHOUT_LIB AND
+    NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB AND
+    NOT MSVC)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -latomic")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -latomic")
+endif ()
 
 # If we are using MSVC, we need /bigobj.
 if (MSVC)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
+  set(CMAKE_CXX_STANDARD 17)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj /Zm200 /Zc:__cplusplus")
 endif ()
 
 # If we are using MINGW, we need sections and big-obj, otherwise we create too
 # many sections.
-if(CMAKE_COMPILER_IS_GNUCC AND WIN32)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections -Wa,-mbig-obj")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections -Wa,-mbig-obj")
+if (CMAKE_COMPILER_IS_GNUCC AND WIN32)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wa,-mbig-obj")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wa,-mbig-obj")
 endif()
 
 # If using clang, we have to link against libc++ depending on the
 # OS (at least on some systems). Further, gcc sometimes optimizes calls to
 # math.h functions, making -lm unnecessary with gcc, but it may still be
 # necessary with clang.
-if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
   if (APPLE)
     # Detect OS X version. Use '/usr/bin/sw_vers -productVersion' to
     # extract V from '10.V.x'.
     exec_program(/usr/bin/sw_vers ARGS
         -productVersion OUTPUT_VARIABLE MACOSX_VERSION_RAW)
     string(REGEX REPLACE
-        "10\\.([0-9]+).*" "\\1"
-        MACOSX_VERSION
+        "([0-9]+)(\\.([0-9]+).*)*" "\\1"
+        MACOSX_MAJOR_VERSION
+        "${MACOSX_VERSION_RAW}")
+
+    string(REGEX REPLACE
+        "([0-9]+)(\\.([0-9]+).*)*" "\\3"
+        MACOSX_MINOR_VERSION
         "${MACOSX_VERSION_RAW}")
 
      # OSX Lion (10.7) and OS X Mountain Lion (10.8) doesn't automatically
      # select the right stdlib.
-    if(${MACOSX_VERSION} LESS 9)
+    if (${MACOSX_MAJOR_VERSION} LESS 11 AND ${MACOSX_MINOR_VERSION} LESS 9)
       set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
       set(CMAKE_SHARED_LINKER_FLAGS
           "${CMAKE_SHARED_LINKER_FLAGS} -stdlib=libc++")
@@ -178,7 +207,6 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "
   endif()
 
   # Link everything with -lm.
-  set(COMPILER_SUPPORT_LIBRARIES ${COMPILER_SUPPORT_LIBRARIES} "m")
   set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} "m")
   # Use -pthread, but not on OS X.
   if (NOT APPLE)
@@ -188,46 +216,13 @@ endif()
 
 # If we're using gcc, then we need to link against pthreads to use std::thread,
 # which we do in the tests.
-if(CMAKE_COMPILER_IS_GNUCC)
+if (CMAKE_COMPILER_IS_GNUCC)
   find_package(Threads)
-  set(COMPILER_SUPPORT_LIBRARIES ${COMPILER_SUPPORT_LIBRARIES}
-      ${CMAKE_THREAD_LIBS_INIT})
-endif()
-
-# Setup build for test coverage
-if(BUILD_WITH_COVERAGE)
-  # Currently coverage only works with GNU g++.
-  if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    # Find gcov and lcov
-    find_program(GCOV gcov)
-    find_program(LCOV lcov)
-
-    if(NOT GCOV)
-      message(FATAL_ERROR
-          "gcov not found! gcov is required when BUILD_WITH_COVERAGE=ON.")
-    endif()
-
-    set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} "supc++")
-    set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} "quadmath")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage -fno-inline -fno-inline-small-functions -fno-default-inline -fprofile-arcs -fkeep-inline-functions")
-    message(STATUS "Adding debug compile options for code coverage.")
-    # Remove optimizations for better line coverage
-    set(DEBUG ON)
-
-    if(LCOV)
-      configure_file(CMake/mlpack_coverage.in mlpack_coverage @ONLY)
-      add_custom_target(mlpack_coverage DEPENDS mlpack_test COMMAND ${PROJECT_BINARY_DIR}/mlpack_coverage)
-    else()
-      message(WARNING "'lcov' not found; local coverage report is disabled. "
-          "Install 'lcov' and rerun cmake to generate local coverage report.")
-    endif()
-  else()
-    message(FATAL_ERROR "BUILD_WITH_COVERAGE can only work with GNU environment.")
-  endif()
+  set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
 endif()
 
 # Debugging CFLAGS.  Turn optimizations off; turn debugging symbols on.
-if(DEBUG)
+if (DEBUG)
   if (NOT MSVC)
     add_definitions(-DDEBUG)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -ftemplate-backtrace-limit=0")
@@ -236,22 +231,21 @@ if(DEBUG)
 
   # mlpack uses it's own mlpack::backtrace class based on Binary File Descriptor
   # <bfd.h> and linux Dynamic Loader <libdl.h> and more portable version in future
-  if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+  if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
     find_package(Bfd)
     find_package(LibDL)
-    if(LIBBFD_FOUND AND LIBDL_FOUND)
+    if (LIBBFD_FOUND AND LIBDL_FOUND)
       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic")
       set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} ${LIBBFD_INCLUDE_DIRS}
           ${LIBDL_INCLUDE_DIRS})
       set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} ${LIBBFD_LIBRARIES}
           ${LIBDL_LIBRARIES})
-      add_definitions(-DHAS_BFD_DL)
+      add_definitions(-DMLPACK_HAS_BFD_DL)
     else()
       message(WARNING "No libBFD and/or libDL has been found!")
     endif()
   endif()
 else()
-  add_definitions(-DARMA_NO_DEBUG)
   add_definitions(-DNDEBUG)
   if (NOT MSVC)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
@@ -263,19 +257,14 @@ else()
 endif()
 
 # Profiling CFLAGS.  Turn profiling information on.
-if(CMAKE_COMPILER_IS_GNUCC AND PROFILE)
+if (CMAKE_COMPILER_IS_GNUCC AND PROFILE)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pg")
   set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg")
 endif()
 
-# If the user asked for running test cases with verbose output, turn that on.
-if(TEST_VERBOSE)
-  add_definitions(-DTEST_VERBOSE)
-endif()
-
 # If the user asked for extra Armadillo debugging output, turn that on.
-if(ARMA_EXTRA_DEBUG)
+if (ARMA_EXTRA_DEBUG)
   add_definitions(-DARMA_EXTRA_DEBUG)
 endif()
 
@@ -285,206 +274,102 @@ endif()
 #   ARMADILLO_LIBRARY - location of libarmadillo.so / armadillo.lib
 #   ARMADILLO_INCLUDE_DIR - directory containing <armadillo>
 #   ARMADILLO_INCLUDE_DIRS - directories necessary for Armadillo includes
-#   BOOST_ROOT - root of Boost installation
-#   BOOST_INCLUDEDIR - include directory for Boost
-#   BOOST_LIBRARYDIR - library directory for Boost
+#   CEREAL_INCLUDE_DIR - include directory for cereal
 #   ENSMALLEN_INCLUDE_DIR - include directory for ensmallen
 #   STB_IMAGE_INCLUDE_DIR - include directory for STB image library
 #   MATHJAX_ROOT - root of MathJax installation
-find_package(Armadillo "${ARMADILLO_VERSION}" REQUIRED)
 
+# Download and compile OpenBLAS if we are cross compiling mlpack for a specific
+# architecture. The function takes the version of OpenBLAS as variable.
+if (CMAKE_CROSSCOMPILING)
+  search_openblas(0.3.13)
+endif()
+
+if (NOT DOWNLOAD_DEPENDENCIES)
+  find_package(Armadillo "${ARMADILLO_VERSION}" REQUIRED)
+else()
+  find_package(Armadillo "${ARMADILLO_VERSION}")
+  if (NOT ARMADILLO_FOUND)
+    if (NOT CMAKE_CROSSCOMPILING)
+      find_package(BLAS QUIET)
+      find_package(LAPACK QUIET)
+      if (NOT BLAS_FOUND AND NOT LAPACK_FOUND)
+        message(FATAL_ERROR "Can not find BLAS or LAPACK!  These are required for Armadillo.  Please install one of them---or install Armadillo---before installing mlpack.")
+      endif()
+    endif()
+    get_deps(http://files.mlpack.org/armadillo-10.3.0.tar.gz armadillo armadillo-10.3.0.tar.gz)
+    set(ARMADILLO_INCLUDE_DIR ${GENERIC_INCLUDE_DIR})
+    find_package(Armadillo REQUIRED)
+  endif()
+endif()
 # Include directories for the previous dependencies.
 set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} ${ARMADILLO_INCLUDE_DIRS})
 set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} ${ARMADILLO_LIBRARIES})
 
 # Find stb_image.h and stb_image_write.h.
-find_package(StbImage)
-# Download stb_image for image loading.
-if (NOT STB_IMAGE_FOUND)
-  if (DOWNLOAD_STB_IMAGE)
-    set(STB_DIR "stb")
-    install(DIRECTORY DESTINATION "${CMAKE_BINARY_DIR}/deps/${STB_DIR}")
-    file(DOWNLOAD http://mlpack.org/files/stb-2.22/stb_image.h
-        "${CMAKE_BINARY_DIR}/deps/${STB_DIR}/stb_image.h"
-        STATUS STB_IMAGE_DOWNLOAD_STATUS_LIST LOG STB_IMAGE_DOWNLOAD_LOG
-        SHOW_PROGRESS)
-    list(GET STB_IMAGE_DOWNLOAD_STATUS_LIST 0 STB_IMAGE_DOWNLOAD_STATUS)
-    file(DOWNLOAD http://mlpack.org/files/stb-1.13/stb_image_write.h
-        "${CMAKE_BINARY_DIR}/deps/${STB_DIR}/stb_image_write.h"
-        STATUS STB_IMAGE_WRITE_DOWNLOAD_STATUS_LIST
-        LOG STB_IMAGE_WRITE_DOWNLOAD_LOG
-        SHOW_PROGRESS)
-    list(GET STB_IMAGE_WRITE_DOWNLOAD_STATUS_LIST 0
-        STB_IMAGE_WRITE_DOWNLOAD_STATUS)
-    if (STB_IMAGE_DOWNLOAD_STATUS EQUAL 0 AND
-        STB_IMAGE_WRITE_DOWNLOAD_STATUS EQUAL 0)
-      check_hash (http://mlpack.org/files/stb/hash.md5 "${CMAKE_BINARY_DIR}/deps/${STB_DIR}"
-          HASH_CHECK_FAIL)
-      if (HASH_CHECK_FAIL EQUAL 0)
-        set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS}
-            "${CMAKE_BINARY_DIR}/deps/${STB_DIR}/")
-        message(STATUS
-            "Successfully downloaded stb into ${CMAKE_BINARY_DIR}/deps/${STB_DIR}/")
-        # Now we have to also ensure these header files get installed.
-        install(FILES "${CMAKE_BINARY_DIR}/deps/${STB_DIR}/stb_image.h" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
-        install(FILES "${CMAKE_BINARY_DIR}/deps/${STB_DIR}/stb_image_write.h" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
-        add_definitions(-DHAS_STB)
-        set(STB_AVAILABLE "1")
-      else ()
-        message(WARNING
-            "stb/stb_image.h is not installed. Image utilities will not be available!")
-      endif ()
-    else ()
-        file(REMOVE_RECURSE "${CMAKE_BINARY_DIR}/deps/${STB_DIR}/")
-        list(GET STB_IMAGE_DOWNLOAD_STATUS_LIST 1 STB_DOWNLOAD_ERROR)
-        message(WARNING
-            "Could not download stb! Error code ${STB_DOWNLOAD_STATUS}: ${STB_DOWNLOAD_ERROR}!  Error log: ${STB_DOWNLOAD_LOG}")
-        message(WARNING
-            "stb/stb_image.h is not installed. Image utilities will not be available!")
-    endif ()
-  else ()
-    message(WARNING
-        "stb/stb_image.h is not installed. Image utilities will not be available!")
-  endif ()
-else ()
-  # Already has STB installed.
+if (NOT DOWNLOAD_DEPENDENCIES)
+  find_package(StbImage)
+else()
+  find_package(StbImage)
+  if (NOT STB_IMAGE_FOUND)
+    get_deps(http://mlpack.org/files/stb.tar.gz stb stb.tar.gz)
+    set(STB_IMAGE_INCLUDE_DIR ${GENERIC_INCLUDE_DIR})
+    find_package(StbImage REQUIRED)
+  endif()
+endif()
+
+if (STB_IMAGE_FOUND)
   add_definitions(-DHAS_STB)
-  set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} ${STB_IMAGE_INCLUDE_DIR})
   set(STB_AVAILABLE "1")
-endif ()
+  set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} "${STB_IMAGE_INCLUDE_DIR}")
 
-
-# Find ensmallen.
-# Once ensmallen is readily available in package repos, the automatic downloader
-# here can be removed.
-find_package(Ensmallen "${ENSMALLEN_VERSION}")
-if (NOT ENSMALLEN_FOUND)
-  if (DOWNLOAD_ENSMALLEN)
-    file(DOWNLOAD http://www.ensmallen.org/files/ensmallen-2.14.2.tar.gz
-        "${CMAKE_BINARY_DIR}/deps/ensmallen-2.14.2.tar.gz"
-        STATUS ENS_DOWNLOAD_STATUS_LIST LOG ENS_DOWNLOAD_LOG
-        SHOW_PROGRESS)
-    list(GET ENS_DOWNLOAD_STATUS_LIST 0 ENS_DOWNLOAD_STATUS)
-    if (ENS_DOWNLOAD_STATUS EQUAL 0)
-      execute_process(COMMAND ${CMAKE_COMMAND} -E
-          tar xzf "${CMAKE_BINARY_DIR}/deps/ensmallen-2.14.2.tar.gz"
-          WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/deps/")
-
-      # Get the name of the directory.
-      file (GLOB ENS_DIRECTORIES RELATIVE "${CMAKE_BINARY_DIR}/deps/"
-          "${CMAKE_BINARY_DIR}/deps/ensmallen-[0-9]*.[0-9]*.[0-9]*")
-      # list(FILTER) is not available on 3.5 or older, but try to keep
-      # configuring without filtering the list anyway (it might work if only
-      # the file ensmallen-2.14.2.tar.gz is present.
-      if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.6.0")
-        list(FILTER ENS_DIRECTORIES EXCLUDE REGEX "ensmallen-.*\.tar\.gz")
-      endif ()
-      list(LENGTH ENS_DIRECTORIES ENS_DIRECTORIES_LEN)
-      if (ENS_DIRECTORIES_LEN EQUAL 1)
-        list(GET ENS_DIRECTORIES 0 ENSMALLEN_INCLUDE_DIR)
-        set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS}
-            "${CMAKE_BINARY_DIR}/deps/${ENSMALLEN_INCLUDE_DIR}/include")
-        message(STATUS
-            "Successfully downloaded ensmallen into ${CMAKE_BINARY_DIR}/deps/${ENSMALLEN_INCLUDE_DIR}/")
-
-        # Now we have to also ensure these header files get installed.
-        install(DIRECTORY "${CMAKE_BINARY_DIR}/deps/${ENSMALLEN_INCLUDE_DIR}/include/ensmallen_bits/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/ensmallen_bits")
-        install(FILES "${CMAKE_BINARY_DIR}/deps/${ENSMALLEN_INCLUDE_DIR}/include/ensmallen.hpp" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
-      else ()
-        message(FATAL_ERROR "Problem unpacking ensmallen!  Expected only one directory ensmallen-x.y.z/; found ${ENS_DIRECTORIES}. Try removing the directory ${CMAKE_BINARY_DIR}/deps and reconfiguring.")
-      endif ()
-    else ()
-      list(GET ENS_DOWNLOAD_STATUS_LIST 1 ENS_DOWNLOAD_ERROR)
-      message(FATAL_ERROR
-          "Could not download ensmallen! Error code ${ENS_DOWNLOAD_STATUS}: ${ENS_DOWNLOAD_ERROR}!  Error log: ${ENS_DOWNLOAD_LOG}")
-    endif ()
-  else ()
-    # Release versions will have ensmallen packaged with the release so we can
-    # just reference that.
-    if (EXISTS "${CMAKE_SOURCE_DIR}/src/mlpack/core/optimizers/ensmallen/ensmallen.hpp")
-      set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} ${ARMADILLO_INCLUDE_DIRS}
-          "${CMAKE_SOURCE_DIR}/src/mlpack/core/optimizers/ensmallen")
-    else ()
-      message(FATAL_ERROR
-          "Cannot find ensmallen headers!  Try setting ENSMALLEN_INCLUDE_DIR!")
-    endif ()
+  # Make sure that we can link STB in multiple translation units.
+  include(CMake/TestStaticSTB.cmake)
+  if (NOT CMAKE_HAS_WORKING_STATIC_STB)
+    message(FATAL_ERROR "STB implementations's static mode cannot link across "
+        "multiple translation units!  Try upgrading your STB implementation, "
+        "or using the auto-downloader (set DOWNLOAD_DEPENDENCIES=ON in the "
+        "CMake configuration command.")
   endif ()
-else ()
-  set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} "${ENSMALLEN_INCLUDE_DIR}")
-endif ()
+endif()
 
-# Unfortunately this configuration variable is necessary and will need to be
-# updated as time goes on and new versions are released.
-set(Boost_ADDITIONAL_VERSIONS
-  "1.74.0" "1.74"  
-  "17.3.0" "17.3"		
-  "1.72.0" "1.72"
-  "1.71.0" "1.71"
-  "1.70.0" "1.70"
-  "1.69.0" "1.69"
-  "1.68.0" "1.68"
-  "1.67.0" "1.67"
-  "1.66.0" "1.66"
-  "1.65.1" "1.65.0" "1.65"
-  "1.64.1" "1.64.0" "1.64"
-  "1.63.1" "1.63.0" "1.63"
-  "1.62.1" "1.62.0" "1.62"
-  "1.61.1" "1.61.0" "1.61"
-  "1.60.1" "1.60.0" "1.60"
-  "1.59.1" "1.59.0" "1.59"
-  "1.58.1" "1.58.0" "1.58")
-# Disable forced config-mode CMake search for Boost, which only imports targets
-# and does not set the variables that we need.
-#
-# TODO for the brave: transition all mlpack's CMake to 'target-based modern
-# CMake'.  Good luck!  You'll need it.
-set(Boost_NO_BOOST_CMAKE 1)
-find_package(Boost "${BOOST_VERSION}"
-    COMPONENTS
-      unit_test_framework
-      serialization
-    REQUIRED
-)
-
-link_directories(${Boost_LIBRARY_DIRS})
-
-# In Visual Studio, automatic linking is performed, so we don't need to worry
-# about it.  Clear the list of libraries to link against and let Visual Studio
-# handle it.
-if (MSVC)
-  link_directories(${Boost_LIBRARY_DIRS})
-  set(CMAKE_MSVCIDE_RUN_PATH ${CMAKE_MSVCIDE_RUN_PATH} ${Boost_LIBRARY_DIRS})
-  message("boost lib dirs ${Boost_LIBRARY_DIRS}")
-  set(Boost_LIBRARIES "")
-endif ()
+# Find ensmallen.
+if (NOT DOWNLOAD_DEPENDENCIES)
+  find_package(Ensmallen "${ENSMALLEN_VERSION}" REQUIRED)
+else()
+  find_package(Ensmallen "${ENSMALLEN_VERSION}")
+  if (NOT ENSMALLEN_FOUND)
+    get_deps(http://www.ensmallen.org/files/ensmallen-2.19.0.tar.gz ensmallen ensmallen-latest.tar.gz)
+    set(ENSMALLEN_INCLUDE_DIR ${GENERIC_INCLUDE_DIR})
+    find_package(Ensmallen REQUIRED)
+  endif()
+endif()
+set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} "${ENSMALLEN_INCLUDE_DIR}")
 
-set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
-set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} ${Boost_LIBRARIES})
-set(MLPACK_LIBRARY_DIRS ${MLPACK_LIBRARY_DIRS} ${Boost_LIBRARY_DIRS})
-
-# For Boost testing framework (will have no effect on non-testing executables).
-# This specifies to Boost that we are dynamically linking to the Boost test
-# library.
-add_definitions(-DBOOST_TEST_DYN_LINK)
+# Find cereal.
+if (NOT DOWNLOAD_DEPENDENCIES)
+  find_package(cereal "${CEREAL_VERSION}" REQUIRED)
+else()
+  find_package(cereal "${CEREAL_VERSION}")
+  if (NOT CEREAL_FOUND)
+    get_deps(https://github.com/USCiLab/cereal/archive/refs/tags/v1.3.0.tar.gz cereal cereal-1.3.0.tar.gz)
+    set(CEREAL_INCLUDE_DIR ${GENERIC_INCLUDE_DIR})
+    find_package(cereal REQUIRED)
+  endif()
+endif()
+set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} ${CEREAL_INCLUDE_DIR})
 
 # Detect OpenMP support in a compiler. If the compiler supports OpenMP, flags
-# to compile with OpenMP are returned and added and the HAS_OPENMP definition
-# is added for compilation.
-#
-# This way we can skip calls to functions defined in omp.h with code like:
-# #ifdef HAS_OPENMP
-# {
-#   ... openMP code here ...
-# }
-# #endif
+# to compile with OpenMP are returned and added.  Note that MSVC does not
+# support a new-enough version of OpenMP to be useful.
 if (USE_OPENMP)
   find_package(OpenMP)
 endif ()
 
-if (OPENMP_FOUND)
-  add_definitions(-DHAS_OPENMP)
+if (OpenMP_FOUND AND OpenMP_CXX_VERSION VERSION_GREATER_EQUAL 3.0.0)
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+  set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} ${OpenMP_CXX_LIBRARIES})
 else ()
   # Disable warnings for all the unknown OpenMP pragmas.
   if (NOT MSVC)
@@ -500,7 +385,7 @@ endif ()
 include(CMake/TargetDistclean.cmake OPTIONAL)
 
 include_directories(BEFORE ${MLPACK_INCLUDE_DIRS})
-include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/)
+include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src/)
 
 # On Windows, things end up under Debug/ or Release/.
 if (WIN32)
@@ -560,21 +445,6 @@ if (GIT_FOUND)
   endif ()
 endif ()
 
-# Create a target to generate arma_config.hpp, which is used to warn the user
-# when they are doing something stupid when linking something against mlpack.
-include(CMake/CreateArmaConfigInfo.cmake)
-
-add_custom_target(mlpack_arma_config ALL
-    COMMAND ${CMAKE_COMMAND}
-        -D ARMADILLO_INCLUDE_DIR="${ARMADILLO_INCLUDE_DIR}"
-        -D OPENMP_FOUND="${OPENMP_FOUND}"
-        -D CMAKE_SIZEOF_VOID_P="${CMAKE_SIZEOF_VOID_P}"
-        -P CMake/CreateArmaConfigInfo.cmake
-    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-    COMMENT "Updating arma_config.hpp (if necessary)")
-set(MLPACK_SRCS ${MLPACK_SRCS}
-    "${CMAKE_CURRENT_SOURCE_DIR}/src/mlpack/core/util/arma_config.hpp")
-
 # Make a target to generate the man page documentation, but only if we are on a
 # UNIX-like system.
 if (BUILD_CLI_EXECUTABLES AND UNIX)
@@ -600,60 +470,13 @@ if (BUILD_CLI_EXECUTABLES AND UNIX)
   endif ()
 endif ()
 
+# Finally, add any cross-compilation support libraries (they may need to come
+# last).  If we are not cross-compiling, no changes will happen here.
+set(MLPACK_LIBRARIES ${MLPACK_LIBRARIES} ${CROSS_COMPILE_SUPPORT_LIBRARIES})
+
 # Recurse into the rest of the project.
 add_subdirectory(src/mlpack)
 
-# If we need to keep gitversion.hpp up to date, then make sure the mlpack target
-# depends on it.
-if (USING_GIT STREQUAL "YES")
-  add_dependencies(mlpack_headers mlpack_gitversion)
-endif ()
-
-# Make the mlpack_arma_config target depend on mlpack (we couldn't do this
-# before the add_subdirectory() call because the mlpack target didn't exist
-# before that).
-add_dependencies(mlpack_headers mlpack_arma_config)
-
-# Make a target to generate the documentation.  If Doxygen isn't installed, then
-# I guess this option will just be unavailable.
-find_package(Doxygen)
-if (DOXYGEN_FOUND)
-  if (MATHJAX)
-    find_package(MathJax)
-    if (NOT MATHJAX_FOUND)
-      message(STATUS "Using MathJax at the MathJax Content Delivery Network. "
-          "Be careful, formulas will not be shown without the internet.")
-    endif ()
-  endif ()
-  # Preprocess the Doxyfile.  This is done before 'make doc'.
-  add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/Doxyfile
-      PRE_BUILD
-      COMMAND ${CMAKE_COMMAND}
-          -D DESTDIR=${CMAKE_BINARY_DIR}
-          -D MATHJAX="${MATHJAX}"
-          -D MATHJAX_FOUND="${MATHJAX_FOUND}"
-          -D MATHJAX_PATH="${MATHJAX_PATH}"
-          -P "${CMAKE_CURRENT_SOURCE_DIR}/CMake/GenerateDoxyfile.cmake"
-      WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
-      DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile"
-      COMMENT "Creating Doxyfile to generate Doxygen documentation"
-  )
-
-  # Generate documentation.
-  add_custom_target(doc
-      COMMAND "${DOXYGEN_EXECUTABLE}" "${CMAKE_BINARY_DIR}/Doxyfile"
-      DEPENDS "${CMAKE_BINARY_DIR}/Doxyfile"
-      WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
-      COMMENT "Generating API documentation with Doxygen"
-  )
-
-  install(DIRECTORY "${CMAKE_BINARY_DIR}/doc/html"
-      DESTINATION "${CMAKE_INSTALL_DOCDIR}"
-      COMPONENT doc
-      OPTIONAL
-  )
-endif ()
-
 # Create the pkg-config file, if we have pkg-config.
 find_package(PkgConfig)
 if (PKG_CONFIG_FOUND)
@@ -693,13 +516,14 @@ if (PKG_CONFIG_FOUND)
 
       list(APPEND MLPACK_LIBRARIES_LIST "-L${library_dir}")
       list(APPEND MLPACK_LIBRARIES_LIST "-l${library_name}")
+    elseif ("${first}" STREQUAL "-")
+      # This argument is already in the right format.  (This happens with, e.g.,
+      # `-lpthread`.)
+      list(APPEND MLPACK_LIBRARIES_LIST "${lib}")
     else ()
       list(APPEND MLPACK_LIBRARIES_LIST "-l${lib}")
     endif ()
   endforeach ()
-  # Don't forget to add mlpack as a dependency too.
-  list(APPEND MLPACK_LIBRARIES_LIST "-L${CMAKE_INSTALL_PREFIX}/lib/")
-  list(APPEND MLPACK_LIBRARIES_LIST "-lmlpack")
 
   # Filter duplicate dependencies and directories.
   list(REMOVE_DUPLICATES MLPACK_LIBRARIES_LIST)
@@ -723,8 +547,8 @@ if (PKG_CONFIG_FOUND)
 
   add_custom_target(pkgconfig ALL
       ${CMAKE_COMMAND}
+          -D MLPACK_SOURCE_DIR="${CMAKE_SOURCE_DIR}"
           -P "${CMAKE_CURRENT_SOURCE_DIR}/CMake/GeneratePkgConfig.cmake"
-      DEPENDS mlpack_headers
       COMMENT "Generating mlpack.pc (pkg-config) file.")
 
   install(FILES "${CMAKE_CURRENT_BINARY_DIR}/lib/pkgconfig/mlpack.pc"
diff -pruN 3.4.2-7/COPYRIGHT.txt 4.0.1-1/COPYRIGHT.txt
--- 3.4.2-7/COPYRIGHT.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/COPYRIGHT.txt	2022-12-29 15:40:18.000000000 +0000
@@ -7,7 +7,7 @@ Source:
 
 Files: *
 Copyright:
-  Copyright 2008-2018, Ryan Curtin <ryan@ratml.org>
+  Copyright 2008-2022, Ryan Curtin <ryan@ratml.org>
   Copyright 2008-2013, Bill March <march@gatech.edu>
   Copyright 2008-2012, Dongryeol Lee <dongryel@cc.gatech.edu>
   Copyright 2008-2013, Nishant Mehta <niche@cc.gatech.edu>
@@ -22,11 +22,11 @@ Copyright:
   Copyright 2012, Rajendran Mohan <rmohan88@gatech.edu>
   Copyright 2012, Trironk Kiatkungwanglai <trironk@gmail.com>
   Copyright 2012, Patrick Mason <patrick.s.mason@gmail.com>
-  Copyright 2013-2018, Marcus Edel <marcus.edel@fu-berlin.de>
+  Copyright 2013-2020, Marcus Edel <marcus.edel@fu-berlin.de>
   Copyright 2013, Mudit Raj Gupta <mudit.raaj.gupta@gmail.com>
   Copyright 2013-2018, Sumedh Ghaisas <sumedhghaisas@gmail.com>
   Copyright 2014, Michael Fox <michaelfox99@gmail.com>
-  Copyright 2014, Ryan Birmingham <birm@gatech.edu>
+  Copyright 2014,2020 Ryan Birmingham <birm@gatech.edu>
   Copyright 2014, Siddharth Agrawal <siddharth.950@gmail.com>
   Copyright 2014, Saheb Motiani <saheb210692@gmail.com>
   Copyright 2014, Yash Vadalia <yashdv@gmail.com>
@@ -37,7 +37,7 @@ Copyright:
   Copyright 2014, Udit Saxena <saxenda.udit@gmail.com>
   Copyright 2014-2015, Stephen Tu <tu.stephenl@gmail.com>
   Copyright 2014-2015, Jaskaran Singh <jaskaranvirdi@ymail.com>
-  Copyright 2015&2017, Shangtong Zhang <zhangshangtong.cpp@gmail.com>
+  Copyright 2015,2017, Shangtong Zhang <zhangshangtong.cpp@gmail.com>
   Copyright 2015, Hritik Jain <hritik.jain.cse13@itbhu.ac.in>
   Copyright 2015, Vladimir Glazachev <glazachev.vladimir@gmail.com>
   Copyright 2015, QiaoAn Chen <kazenoyumechen@gmail.com>
@@ -55,7 +55,7 @@ Copyright:
   Copyright 2016, Palash Ahuja <abhor902@gmail.com>
   Copyright 2016, Yannis Mentekidis <mentekid@gmail.com>
   Copyright 2016, Ranjan Mondal <ranjan.rev@gmail.com>
-  Copyright 2016-2018, Mikhail Lozhnikov <lozhnikovma@gmail.com>
+  Copyright 2016-2020, Mikhail Lozhnikov <lozhnikovma@gmail.com>
   Copyright 2016, Marcos Pividori <marcos.pividori@gmail.com>
   Copyright 2016, Keon Kim <kwk236@gmail.com>
   Copyright 2016, Nilay Jain <nilayjain13@gmail.com>
@@ -84,14 +84,14 @@ Copyright:
   Copyright 2017, N Rajiv Vaidyanathan <rajivvaidyanathan4@gmail.com>
   Copyright 2017, Kartik Nighania <kartiknighania@gmail.com>
   Copyright 2017-2018, Eugene Freyman <evg.freyman@gmail.com>
-  Copyright 2017-2018, Manish Kumar <manish887kr@gmail.com>
+  Copyright 2017-2019, Manish Kumar <manish887kr@gmail.com>
   Copyright 2017-2018, Haritha Sreedharan Nair <haritha1313@gmail.com>
   Copyright 2017-2018, Sourabh Varshney <sourabhvarshney111@gmail.com>
   Copyright 2018, Projyal Dev <projyal@gmail.com>
   Copyright 2018, Nikhil Goel <nikhilgoel199797@gmail.com>
-  Copyright 2018, Shikhar Jaiswal <jaiswalshikhar87@gmail.com>
+  Copyright 2018-2020 Shikhar Jaiswal <jaiswalshikhar87@gmail.com>
   Copyright 2018, B Kartheek Reddy <bkartheekreddy@gmail.com>
-  Copyright 2018, Atharva Khandait <akhandait45@gmail.com>
+  Copyright 2018-2019 Atharva Khandait <akhandait45@gmail.com>
   Copyright 2018, Wenhao Huang <wenhao.huang.work@gmail.com>
   Copyright 2018-2019, Roberto Hueso <robertohueso96@gmail.com>
   Copyright 2018, Prabhat Sharma <prabhatsharma7298@gmail.com>
@@ -114,9 +114,9 @@ Copyright:
   Copyright 2019, Miguel Canteras <mcanteras@gmail.com>
   Copyright 2019, Bishwa Karki <karkeebishwa1@gmail.com>
   Copyright 2019, Mehul Kumar Nirala <mehulkumarnirala@gmail.com>
-  Copyright 2019, Yashwant Singh Parihar <yashwantsingh.sngh@gmail.com>
+  Copyright 2019-2020 Yashwant Singh Parihar <yashwantsingh.sngh@gmail.com>
   Copyright 2019, Heet Sankesara <heetsankesara3@gmail.com>
-  Copyright 2019, Jeffin Sam <sam.jeffin@gmail.com>
+  Copyright 2019-2020 Jeffin Sam <sam.jeffin@gmail.com>
   Copyright 2019, Vikas S Shetty <shettyvikas209@gmail.com>
   Copyright 2019, Khizir Siddiqui <khizirsiddiqui@gmail.com>
   Copyright 2019, Tejasvi Tomar <tstomar@outlook.com>
@@ -124,7 +124,7 @@ Copyright:
   Copyright 2019, Ziyang Jiang <zij004@alumni.stanford.edu>
   Copyright 2019, Rohit Kartik <rohit.audrey@gmail.com>
   Copyright 2019, Aditya Viki <adityaviki01@gmail.com>
-  Copyright 2019, Kartik Dutt <kartikdutt@live.in>
+  Copyright 2019-2020 Kartik Dutt <kartikdutt@live.in>
   Copyright 2020, Sriram S K <sriramsk1999@gmail.com>
   Copyright 2020, Manoranjan Kumar Bharti ( Nakul Bharti ) <knakul853@gmail.com>
   Copyright 2020, Saraansh Tandon <saraanshtandon1999@gmail.com>
@@ -134,7 +134,22 @@ Copyright:
   Copyright 2020, Benson Muite <benson_muite@emailplus.org>
   Copyright 2020, Sarthak Bhardwaj <7sarthakbhardwaj@gmail.com>
   Copyright 2020, Aakash Kaushik <kaushikaakash7539@gmail.com>
-  Copyright 2020, Anush Kini <anushkini@gmail.com>   
+  Copyright 2020, Anush Kini <anushkini@gmail.com>
+  Copyright 2020, Nippun Sharma <inbox.nippun@gmail.com>
+  Copyright 2020, Rishabh Garg <rishabhgarg108@gmail.com>
+  Copyright 2020, Sudhakar Brar <dxhrmhall1449@tutanota.com>
+  Copyright 2020, Alex Nguyen <alexvn.edu@gmail.com>
+  Copyright 2020, Gaurav Ghati <gauravghatii@gmail.com>
+  Copyright 2020, Anmolpreet Singh <anmol323c@gmail.com>
+  Copyright 2021, Tru Hoang <trugiahoang@gmail.com>
+  Copyright 2021, Mark Fischinger <markfischinger@gmail.com>
+  Copyright 2021, Muhammad Fawwaz Mayda <maydafawwaz@gmail.com>
+  Copyright 2021, Roshan Nrusing Swain <swainroshan001@gmail.com>
+  Copyright 2021, Suvarsha Chennareddy <suvarshachennareddy@gmail.com>
+  Copyright 2021, Shubham Agrawal <shubham.agra1206@gmail.com>
+  Copyright 2022, Sri Madhan M <srimadhan11@gmail.com>
+  Copyright 2022, Zhuojin Liu <zhuojinliu.cs@gmail.com>
+  Copyright 2022, Richèl Bilderbeek <richel@richelbilderbeek.nl>
 
 License: BSD-3-clause
   All rights reserved.
diff -pruN 3.4.2-7/debian/changelog 4.0.1-1/debian/changelog
--- 3.4.2-7/debian/changelog	2022-04-22 12:32:17.000000000 +0000
+++ 4.0.1-1/debian/changelog	2023-01-13 10:25:39.000000000 +0000
@@ -1,3 +1,28 @@
+mlpack (4.0.1-1) unstable; urgency=medium
+
+  * new upstream version
+
+ -- Barak A. Pearlmutter <bap@debian.org>  Fri, 13 Jan 2023 10:25:39 +0000
+
+mlpack (4.0.0-1) unstable; urgency=medium
+
+  * tweak debian/watch to ignore the -windows version
+  * new upstream version
+  * forward port quilt patches (all are obsolete or upstreamed)
+  * disable Go language bindings (future work: generate Julia & Go bindings)
+  * remove build dependency on boost
+  * add build dependency on libcereal-dev
+  * package is now header-only: remove shared library binary package
+  * remove clang build machinery (closes: #1017678)
+  * remove doxygen documentation build
+  * remove code related to no-longer-present files
+  * work around upstream python3 installing into /usr/local
+  * include new docs
+  * bump policy
+  * patch to not disable optimization with -O0 (closes: #970227)
+
+ -- Barak A. Pearlmutter <bap@debian.org>  Tue, 08 Nov 2022 13:13:44 +0000
+
 mlpack (3.4.2-7) unstable; urgency=medium
 
   [ Debian Janitor ]
diff -pruN 3.4.2-7/debian/clean 4.0.1-1/debian/clean
--- 3.4.2-7/debian/clean	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/clean	2022-11-07 20:22:00.000000000 +0000
@@ -1,2 +1 @@
-src/mlpack/core/util/arma_config.hpp
 src/mlpack/core/util/gitversion.hpp
diff -pruN 3.4.2-7/debian/control 4.0.1-1/debian/control
--- 3.4.2-7/debian/control	2022-04-14 13:11:49.000000000 +0000
+++ 4.0.1-1/debian/control	2022-11-09 15:30:59.000000000 +0000
@@ -4,14 +4,8 @@ Priority: optional
 Maintainer: Debian Science Maintainers <debian-science-maintainers@alioth-lists.debian.net>
 Uploaders: Barak A. Pearlmutter <bap@debian.org>
 Build-Depends: cmake, debhelper-compat (= 13),
-	        clang [!alpha !hppa !ia64 !m68k !sh4 !x32],
-		libomp-dev [!alpha !hppa !ia64 !m68k !sh4 !x32],
 		pkg-config,
-		libboost-math-dev,
-		libboost-program-options-dev,
-		libboost-random-dev,
-		libboost-test-dev,
-		libboost-serialization-dev,
+		libcereal-dev,
 		libxml2-dev,
 		libarmadillo-dev,
 		libensmallen-dev (>= 2.10.0),
@@ -20,8 +14,7 @@ Build-Depends: cmake, debhelper-compat (
 		python3, python3-dev, python3-pandas, python3-numpy, cython3, python3-setuptools,
 		python3-pytest-runner,
 		txt2man,
-		doxygen, doxygen-latex, graphviz, latexmk
-Standards-Version: 4.6.0
+Standards-Version: 4.6.1
 Rules-Requires-Root: no
 Homepage: https://www.mlpack.org/
 Vcs-Git: https://salsa.debian.org/science-team/mlpack.git
@@ -32,9 +25,9 @@ Section: libdevel
 Architecture: any
 Multi-Arch: same
 Pre-Depends: ${misc:Pre-Depends}
-Depends: ${misc:Depends}, ${shlibs:Depends}, ${python3:Depends}, libmlpack3 (= ${binary:Version}),
+Depends: ${misc:Depends}, ${python3:Depends},
 	 libarmadillo-dev, liblapack-dev, libxml2-dev,
-	 libboost-dev, libboost-program-options-dev, libboost-serialization-dev, libboost-test-dev
+	 libcereal-dev
 Suggests: mlpack-doc
 Description: intuitive, fast, scalable C++ machine learning library (development libs)
  This package contains the mlpack Library development files.
@@ -45,26 +38,11 @@ Description: intuitive, fast, scalable C
  methods and function as a "swiss army knife" for machine learning
  researchers.
 
-Package: libmlpack3
-Architecture: any
-Multi-Arch: same
-Pre-Depends: ${misc:Pre-Depends}
-Depends: ${misc:Depends}, ${shlibs:Depends}, ${python3:Depends}
-Description: intuitive, fast, scalable C++ machine learning library (runtime library)
- This package contains the mlpack Library runtime files.
- .
- Machine Learning Pack (mlpack) is an intuitive, fast, scalable C++
- machine learning library, meant to be a machine learning analog to
- LAPACK.  It aims to implement a wide array of machine learning
- methods and function as a "swiss army knife" for machine learning
- researchers.
-
 Package: python3-mlpack
 Architecture: any
 Section: python
-Depends: libmlpack3 (= ${binary:Version}), ${misc:Depends}, ${shlibs:Depends},
+Depends: ${misc:Depends}, ${shlibs:Depends},
 	 ${python3:Depends}
-Provides: ${python3:Provides}
 Description: intuitive, fast, scalable C++ machine learning library (Python bindings)
  This package contains Python bindings for the mlpack Library.
  .
@@ -78,7 +56,7 @@ Package: mlpack-bin
 Section: science
 Architecture: any
 Pre-Depends: ${misc:Pre-Depends}
-Depends: ${misc:Depends}, ${shlibs:Depends}, ${python3:Depends}, libmlpack3 (>= ${binary:Version})
+Depends: ${misc:Depends}, ${shlibs:Depends}, ${python3:Depends}
 Description: intuitive, fast, scalable C++ machine learning library (binaries)
  This package contains example binaries using the mlpack Library.
  .
diff -pruN 3.4.2-7/debian/libmlpack3.install 4.0.1-1/debian/libmlpack3.install
--- 3.4.2-7/debian/libmlpack3.install	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/libmlpack3.install	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-/usr/lib/*/lib*.so.*
diff -pruN 3.4.2-7/debian/libmlpack-dev.install 4.0.1-1/debian/libmlpack-dev.install
--- 3.4.2-7/debian/libmlpack-dev.install	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/libmlpack-dev.install	2022-11-07 20:22:00.000000000 +0000
@@ -1,7 +1,2 @@
 /usr/include
-/usr/lib/*/lib*.so
-# /usr/lib/*/lib*.a
-# Work around bug of not installing this, per email:
-src/mlpack/core/util/arma_config.hpp /usr/include/mlpack/core/util/
 /usr/lib/*/pkgconfig/mlpack.pc
-/usr/lib/*/cmake/mlpack/*.cmake
diff -pruN 3.4.2-7/debian/mlpack-doc.doc-base.mlpack-manual 4.0.1-1/debian/mlpack-doc.doc-base.mlpack-manual
--- 3.4.2-7/debian/mlpack-doc.doc-base.mlpack-manual	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/mlpack-doc.doc-base.mlpack-manual	1970-01-01 00:00:00.000000000 +0000
@@ -1,14 +0,0 @@
-Document: mlpack-manual
-Title: Mlpack Reference Manual
-Author: Ryan Curtin, et al
-Abstract:
- Reference Manual for the mlpack C++ machine learning library.
- This documentation is generated by DOxygen.
-Section: Programming/C++
-
-Format: PDF
-Files: /usr/share/doc/libmlpack-dev/refman.pdf
-
-Format: HTML
-Index: /usr/share/doc/mlpack-doc/html/index.html
-Files: /usr/share/doc/mlpack-doc/html/*
diff -pruN 3.4.2-7/debian/mlpack-doc.docs 4.0.1-1/debian/mlpack-doc.docs
--- 3.4.2-7/debian/mlpack-doc.docs	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/mlpack-doc.docs	2022-11-07 20:22:00.000000000 +0000
@@ -1,7 +1,9 @@
-# reference manual in PDF
-obj-*/doc/latex/refman.pdf
 README.md
-doc/guide
 doc/tutorials
 UPDATING.txt
 CONTRIBUTING.md
+
+doc/developer
+doc/joss_paper
+doc/quickstart
+doc/user
diff -pruN 3.4.2-7/debian/mlpack-doc.install 4.0.1-1/debian/mlpack-doc.install
--- 3.4.2-7/debian/mlpack-doc.install	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/mlpack-doc.install	1970-01-01 00:00:00.000000000 +0000
@@ -1,22 +0,0 @@
-# reference manual in HTML
-
-# /usr/share/doc/mlpack/html/*.html	/usr/share/doc/mlpack-doc/html/
-# /usr/share/doc/mlpack/html/*.png	/usr/share/doc/mlpack-doc/html/
-# /usr/share/doc/mlpack/html/*.js		/usr/share/doc/mlpack-doc/html/
-# /usr/share/doc/mlpack/html/*.css	/usr/share/doc/mlpack-doc/html/
-# /usr/share/doc/mlpack/html/*.svg	/usr/share/doc/mlpack-doc/html/
-
-# /usr/share/doc/mlpack/html/search/*.html	/usr/share/doc/mlpack/html/search/
-# /usr/share/doc/mlpack/html/search/*.png		/usr/share/doc/mlpack/html/search/
-# /usr/share/doc/mlpack/html/search/*.js		/usr/share/doc/mlpack/html/search/
-# /usr/share/doc/mlpack/html/search/*.css		/usr/share/doc/mlpack/html/search/
-# /usr/share/doc/mlpack/html/search/*.svg		/usr/share/doc/mlpack/html/search/
-
-# Just install it all! Because changes to Doxygen version, or
-# configuration, can add files with a new extension or remove all
-# files with some extension, which makes the above pretty brittle.
-#
-# NOTE: Might need to remove formula.repository if there are formula
-# rendering issues; see Doxygen manual.
-
-/usr/share/doc/mlpack/html	/usr/share/doc/mlpack-doc/
diff -pruN 3.4.2-7/debian/patches/0001-build-Doxygen.patch 4.0.1-1/debian/patches/0001-build-Doxygen.patch
--- 3.4.2-7/debian/patches/0001-build-Doxygen.patch	2022-04-22 12:30:53.000000000 +0000
+++ 4.0.1-1/debian/patches/0001-build-Doxygen.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,25 +0,0 @@
-From: "Barak A. Pearlmutter" <barak+git@cs.nuim.ie>
-Date: Sat, 5 Apr 2014 13:18:23 +0100
-Subject: build Doxygen
-
-This patch causes the build system to generate refman.tex.
-The same effect can be had by adding the command
- $(MAKE) -C obj-* doc
-to the debian/rules override_dh_auto_build target.
----
- CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 42f1fec..0201063 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -640,7 +640,7 @@ if (DOXYGEN_FOUND)
-   )
- 
-   # Generate documentation.
--  add_custom_target(doc
-+  add_custom_target(doc ALL
-       COMMAND "${DOXYGEN_EXECUTABLE}" "${CMAKE_BINARY_DIR}/Doxyfile"
-       DEPENDS "${CMAKE_BINARY_DIR}/Doxyfile"
-       WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
diff -pruN 3.4.2-7/debian/patches/0001-do-not-disable-optimization.patch 4.0.1-1/debian/patches/0001-do-not-disable-optimization.patch
--- 3.4.2-7/debian/patches/0001-do-not-disable-optimization.patch	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/debian/patches/0001-do-not-disable-optimization.patch	2022-11-09 15:29:59.000000000 +0000
@@ -0,0 +1,24 @@
+From: "Barak A. Pearlmutter" <barak+git@pearlmutter.net>
+Date: Tue, 8 Nov 2022 13:12:10 +0000
+Subject: do not disable optimization
+
+Do not disable optimization (-O0) just because debugging is enabled (-g)
+---
+ CMakeLists.txt | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 4a2f1bc..fe3fd59 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -225,8 +225,8 @@ endif()
+ if (DEBUG)
+   if (NOT MSVC)
+     add_definitions(-DDEBUG)
+-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -ftemplate-backtrace-limit=0")
+-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -g -O0")
++    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ftemplate-backtrace-limit=0")
++    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -g")
+   endif()
+ 
+   # mlpack uses it's own mlpack::backtrace class based on Binary File Descriptor
diff -pruN 3.4.2-7/debian/patches/0002-Doxygen-timestamp.patch 4.0.1-1/debian/patches/0002-Doxygen-timestamp.patch
--- 3.4.2-7/debian/patches/0002-Doxygen-timestamp.patch	2022-04-22 12:30:53.000000000 +0000
+++ 4.0.1-1/debian/patches/0002-Doxygen-timestamp.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,27 +0,0 @@
-From: "Barak A. Pearlmutter" <barak+git@pearlmutter.net>
-Date: Tue, 16 Jun 2015 11:15:38 +0100
-Subject: Doxygen timestamp
-
-Remove timestamp from files output by doxygen, to satisfy the czars of
-Debian ReproducibleBuilds.  See
-https://wiki.debian.org/ReproducibleBuilds/TimestampsInDocumentationGeneratedByDoxygen
----
- Doxyfile | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/Doxyfile b/Doxyfile
-index 465aa90..20aaf1e 100644
---- a/Doxyfile
-+++ b/Doxyfile
-@@ -71,6 +71,11 @@ SHOW_USED_FILES        = YES
- SHOW_DIRECTORIES       = YES
- FILE_VERSION_FILTER    =
- #---------------------------------------------------------------------------
-+# Debian ReproducibleBuilds configuration option, see
-+# https://wiki.debian.org/ReproducibleBuilds/TimestampsInDocumentationGeneratedByDoxygen
-+#---------------------------------------------------------------------------
-+HTML_TIMESTAMP         = NO
-+#---------------------------------------------------------------------------
- # configuration options related to warning and progress messages
- #---------------------------------------------------------------------------
- QUIET                  = NO
diff -pruN 3.4.2-7/debian/patches/0003-doxygen-warning-defang.patch 4.0.1-1/debian/patches/0003-doxygen-warning-defang.patch
--- 3.4.2-7/debian/patches/0003-doxygen-warning-defang.patch	2022-04-22 12:30:53.000000000 +0000
+++ 4.0.1-1/debian/patches/0003-doxygen-warning-defang.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,25 +0,0 @@
-From: "Barak A. Pearlmutter" <barak+git@pearlmutter.net>
-Date: Tue, 3 Nov 2020 22:12:04 +0000
-Subject: doxygen warning defang
-
-Do not allow doxygen warnings to derail the build.
-This was suggested by Ryan Curtis as a fix for FTBFS on some architectures.
-But be that as it may, we really shouldn't allow documentation build
-warnings to break the build.
----
- Doxyfile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/Doxyfile b/Doxyfile
-index 20aaf1e..30dfcf1 100644
---- a/Doxyfile
-+++ b/Doxyfile
-@@ -80,7 +80,7 @@ HTML_TIMESTAMP         = NO
- #---------------------------------------------------------------------------
- QUIET                  = NO
- WARNINGS               = YES
--WARN_AS_ERROR          = YES
-+WARN_AS_ERROR          = NO
- WARN_IF_UNDOCUMENTED   = YES
- WARN_IF_DOC_ERROR      = YES
- WARN_NO_PARAMDOC       = YES
diff -pruN 3.4.2-7/debian/patches/0004-Check-if-atomics-need-latomic-linking.patch 4.0.1-1/debian/patches/0004-Check-if-atomics-need-latomic-linking.patch
--- 3.4.2-7/debian/patches/0004-Check-if-atomics-need-latomic-linking.patch	2022-04-22 12:30:53.000000000 +0000
+++ 4.0.1-1/debian/patches/0004-Check-if-atomics-need-latomic-linking.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,148 +0,0 @@
-From: Marcus Edel <marcus.edel@fu-berlin.de>
-Date: Wed, 16 Feb 2022 21:42:44 -0500
-Subject: Check if atomics need -latomic linking.
-
----
- CMake/CheckAtomic.cmake | 106 ++++++++++++++++++++++++++++++++++++++++++++++++
- CMakeLists.txt          |   8 ++++
- 2 files changed, 114 insertions(+)
- create mode 100644 CMake/CheckAtomic.cmake
-
-diff --git a/CMake/CheckAtomic.cmake b/CMake/CheckAtomic.cmake
-new file mode 100644
-index 0000000..29f3bdd
---- /dev/null
-+++ b/CMake/CheckAtomic.cmake
-@@ -0,0 +1,106 @@
-+# atomic builtins are required for threading support.
-+
-+INCLUDE(CheckCXXSourceCompiles)
-+INCLUDE(CheckLibraryExists)
-+
-+# Sometimes linking against libatomic is required for atomic ops, if
-+# the platform doesn't support lock-free atomics.
-+
-+function(check_working_cxx_atomics varname)
-+  set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
-+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11")
-+  CHECK_CXX_SOURCE_COMPILES("
-+#include <atomic>
-+std::atomic<int> x;
-+int main() {
-+  return x;
-+}
-+" ${varname})
-+  set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
-+endfunction(check_working_cxx_atomics)
-+
-+function(check_working_cxx_atomics64 varname)
-+  set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
-+  set(CMAKE_REQUIRED_FLAGS "-std=c++11 ${CMAKE_REQUIRED_FLAGS}")
-+  CHECK_CXX_SOURCE_COMPILES("
-+#include <atomic>
-+#include <cstdint>
-+std::atomic<uint64_t> x (0);
-+int main() {
-+  uint64_t i = x.load(std::memory_order_relaxed);
-+  return 0;
-+}
-+" ${varname})
-+  set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
-+endfunction(check_working_cxx_atomics64)
-+
-+
-+# This isn't necessary on MSVC, so avoid command-line switch annoyance
-+# by only running on GCC-like hosts.
-+if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
-+  # First check if atomics work without the library.
-+  check_working_cxx_atomics(HAVE_CXX_ATOMICS_WITHOUT_LIB)
-+  # If not, check if the library exists, and atomics work with it.
-+  if(NOT HAVE_CXX_ATOMICS_WITHOUT_LIB)
-+    check_library_exists(atomic __atomic_fetch_add_4 "" HAVE_LIBATOMIC)
-+    if( HAVE_LIBATOMIC )
-+      list(APPEND CMAKE_REQUIRED_LIBRARIES "atomic")
-+      check_working_cxx_atomics(HAVE_CXX_ATOMICS_WITH_LIB)
-+      if (NOT HAVE_CXX_ATOMICS_WITH_LIB)
-+	message(FATAL_ERROR "Host compiler must support std::atomic!")
-+      endif()
-+    else()
-+      message(FATAL_ERROR "Host compiler appears to require libatomic, but cannot find it.")
-+    endif()
-+  endif()
-+endif()
-+
-+# Check for 64 bit atomic operations.
-+if(MSVC)
-+  set(HAVE_CXX_ATOMICS64_WITHOUT_LIB True)
-+else()
-+  check_working_cxx_atomics64(HAVE_CXX_ATOMICS64_WITHOUT_LIB)
-+endif()
-+
-+# If not, check if the library exists, and atomics work with it.
-+if(NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB)
-+  check_library_exists(atomic __atomic_load_8 "" HAVE_CXX_LIBATOMICS64)
-+  if(HAVE_CXX_LIBATOMICS64)
-+    list(APPEND CMAKE_REQUIRED_LIBRARIES "atomic")
-+    check_working_cxx_atomics64(HAVE_CXX_ATOMICS64_WITH_LIB)
-+    if (NOT HAVE_CXX_ATOMICS64_WITH_LIB)
-+      message(FATAL_ERROR "Host compiler must support 64-bit std::atomic!")
-+    endif()
-+  else()
-+    message(FATAL_ERROR "Host compiler appears to require libatomic for 64-bit operations, but cannot find it.")
-+  endif()
-+endif()
-+
-+## TODO: This define is only used for the legacy atomic operations in
-+## llvm's Atomic.h, which should be replaced.  Other code simply
-+## assumes C++11 <atomic> works.
-+CHECK_CXX_SOURCE_COMPILES("
-+#ifdef _MSC_VER
-+#include <windows.h>
-+#endif
-+int main() {
-+#ifdef _MSC_VER
-+        volatile LONG val = 1;
-+        MemoryBarrier();
-+        InterlockedCompareExchange(&val, 0, 1);
-+        InterlockedIncrement(&val);
-+        InterlockedDecrement(&val);
-+#else
-+        volatile unsigned long val = 1;
-+        __sync_synchronize();
-+        __sync_val_compare_and_swap(&val, 1, 0);
-+        __sync_add_and_fetch(&val, 1);
-+        __sync_sub_and_fetch(&val, 1);
-+#endif
-+        return 0;
-+      }
-+" LLVM_HAS_ATOMICS)
-+
-+if( NOT LLVM_HAS_ATOMICS )
-+  message(STATUS "Warning: LLVM will be built thread-unsafe because atomic builtins are missing")
-+endif()
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 0201063..d681074 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -3,6 +3,7 @@ project(mlpack C CXX)
- 
- include(CMake/cotire.cmake)
- include(CMake/CheckHash.cmake)
-+include(CMake/CheckAtomic.cmake)
- 
- # First, define all the compilation options.
- # We default to debugging mode for developers.
-@@ -138,6 +139,13 @@ endif()
- # specific.  This list is a subset of MLPACK_LIBRARIES.
- set(COMPILER_SUPPORT_LIBRARIES "")
- 
-+# Check if atomics need -latomic linking.
-+include(CheckAtomic)
-+if (LLVM_HAS_ATOMICS)
-+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -latomic")
-+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -latomic")
-+endif ()
-+
- # If we are using MSVC, we need /bigobj.
- if (MSVC)
-   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
diff -pruN 3.4.2-7/debian/patches/series 4.0.1-1/debian/patches/series
--- 3.4.2-7/debian/patches/series	2022-04-22 12:30:53.000000000 +0000
+++ 4.0.1-1/debian/patches/series	2022-11-09 15:29:59.000000000 +0000
@@ -1,4 +1 @@
-0001-build-Doxygen.patch
-0002-Doxygen-timestamp.patch
-0003-doxygen-warning-defang.patch
-0004-Check-if-atomics-need-latomic-linking.patch
+0001-do-not-disable-optimization.patch
diff -pruN 3.4.2-7/debian/python3-mlpack.install 4.0.1-1/debian/python3-mlpack.install
--- 3.4.2-7/debian/python3-mlpack.install	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/python3-mlpack.install	2022-11-07 20:22:00.000000000 +0000
@@ -1 +1,2 @@
-usr/lib/python3*/*-packages/mlpack*    /usr/lib/python3/dist-packages/
+# the "local/" here is new as of 4.0.
+usr/local/lib/python3*/*-packages/mlpack*    /usr/lib/python3/dist-packages/
diff -pruN 3.4.2-7/debian/rules 4.0.1-1/debian/rules
--- 3.4.2-7/debian/rules	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/rules	2022-11-08 10:12:48.000000000 +0000
@@ -15,14 +15,18 @@ export LIBS="pthread"
 ## reduce abrupt memory spikes, and g++ -g0 to skip debugging info.
 ## (c) switch to clang.
 
-# Force Compiler to CLANG if installed. Otherwise use GCC but try to
-# conserve memory. The build dependencies try to install CLANG, but
-# give a dummy alternative so the build can proceed even if it is
-# unavailable.
-ifeq ($(shell type clang > /dev/null && echo found),found)
-export CC=clang
-export CXX=clang++
-else
+## Force Compiler to CLANG if installed. Otherwise use GCC but try to
+## conserve memory. The build dependencies try to install CLANG, but
+## give a dummy alternative so the build can proceed even if it is
+## unavailable.
+## Note: this requires build dependencies of the form
+##     clang [!alpha !hppa !ia64 !m68k !sh4 !x32],
+##     libomp-dev [!alpha !hppa !ia64 !m68k !sh4 !x32],
+
+# ifeq ($(shell type clang > /dev/null && echo found),found)
+# export CC=clang
+# export CXX=clang++
+# else
 
 ## These issues have occurred post-2.1.x in two places: Ubuntu
 ## Launchpad, and some particular Debian architectures including
@@ -54,9 +58,10 @@ else
 	export DEB_CXXFLAGS_MAINT_APPEND = --param ggc-min-expand=20 -g0
 endif
 endif
-endif
+# endif
 
 DO_IT_TO_JULIA=OFF
+TAKE_IT_TO_GO=OFF
 
 %:
 	dh $@ --without python2 --with python3 --buildsystem=cmake ${DH_FLAGS}
@@ -70,17 +75,13 @@ override_dh_auto_configure:
 		-DBUILD_PYTHON_BINDINGS=ON \
 		-DPYTHON_EXECUTABLE=/usr/bin/python3 \
 		-DBUILD_JULIA_BINDINGS=$(DO_IT_TO_JULIA) \
+		-DBUILD_GO_BINDINGS=$(TAKE_IT_TO_GO) \
 		-DBUILD_TESTS=OFF \
 		-DBUILD_SHARED_LIBS=ON \
 		-DDISABLE_DOWNLOADS=ON \
 		-DSTB_IMAGE_INCLUDE_DIR=/usr/include/stb \
 		-DUSE_OPENMP=ON
 
-execute_after_dh_auto_build:
-	@echo "building PDF reference manual"
-	cd obj-*/doc/latex && \
-	 latexmk --pdf refman.tex
-
 override_dh_auto_test:
 	@echo "do not ask do not tell do not test"
 
@@ -112,6 +113,3 @@ execute_after_dh_installexamples:
 
 override_dh_installchangelogs:
 	dh_installchangelogs HISTORY.md
-
-override_dh_compress:
-	dh_compress -Xrefman.pdf -Xdoc/html/
diff -pruN 3.4.2-7/debian/shlibs.local 4.0.1-1/debian/shlibs.local
--- 3.4.2-7/debian/shlibs.local	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/shlibs.local	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-libmlpack 3 libmlpack3
diff -pruN 3.4.2-7/debian/watch 4.0.1-1/debian/watch
--- 3.4.2-7/debian/watch	2022-02-17 13:54:20.000000000 +0000
+++ 4.0.1-1/debian/watch	2022-10-26 19:51:06.000000000 +0000
@@ -1,2 +1,3 @@
 version=4
-https://www.mlpack.org/files/@PACKAGE@-@ANY_VERSION@@ARCHIVE_EXT@
+#https://www.mlpack.org/files/@PACKAGE@-@ANY_VERSION@@ARCHIVE_EXT@
+https://www.mlpack.org/files/@PACKAGE@-([0-9.]+)@ARCHIVE_EXT@
diff -pruN 3.4.2-7/dist/win-installer/mlpack-win-installer/mlpack-win-installer.wixproj 4.0.1-1/dist/win-installer/mlpack-win-installer/mlpack-win-installer.wixproj
--- 3.4.2-7/dist/win-installer/mlpack-win-installer/mlpack-win-installer.wixproj	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/dist/win-installer/mlpack-win-installer/mlpack-win-installer.wixproj	2022-12-29 15:40:18.000000000 +0000
@@ -9,33 +9,36 @@
     <OutputName>mlpack-windows</OutputName>
     <OutputType>Package</OutputType>
     <Name>mlpack-win-installer</Name>
+    <DefineSolutionProperties>false</DefineSolutionProperties>
+    <DefineConstants>SourceDir=.\Sources</DefineConstants>
+    <WixTargetsPath Condition=" '$(WixTargetsPath' == '' ">$(MSBuildExtensionsPath)\Microsoft\WiX\v3.x\Wix.targets</WixTargetsPath>
   </PropertyGroup>
   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
     <OutputPath>bin\$(Configuration)\</OutputPath>
     <IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
-    <DefineConstants>Debug</DefineConstants>
+    <DefineConstants>Debug;$(DefineConstants)</DefineConstants>
   </PropertyGroup>
   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
     <OutputPath>bin\$(Configuration)\</OutputPath>
     <IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
   </PropertyGroup>
   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
-    <DefineConstants>Debug</DefineConstants>
     <OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
     <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
+    <DefineConstants>Debug;$(DefineConstants)</DefineConstants>
   </PropertyGroup>
   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
     <OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
     <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
   </PropertyGroup>
-  <PropertyGroup>
-    <DefineConstants>HarvestPath=..\staging</DefineConstants>
-  </PropertyGroup>
   <ItemGroup>
     <Compile Include="Product.wxs" />
-    <Compile Include="HeatGeneratedFileList.wxs" />
-  </ItemGroup>
-  <ItemGroup>
+    <HarvestDirectory Include=".\Sources">
+      <DirectoryRefId>Sources</DirectoryRefId>
+      <ComponentGroupName>Sources</ComponentGroupName>
+      <PreprocessorVariable>var.SourceDir</PreprocessorVariable>
+      <SuppressRegistry>true</SuppressRegistry>
+    </HarvestDirectory>
     <WixExtension Include="WixUIExtension">
       <HintPath>$(WixExtDir)\WixUIExtension.dll</HintPath>
       <Name>WixUIExtension</Name>
@@ -46,14 +49,4 @@
   <Target Name="EnsureWixToolsetInstalled" Condition=" '$(WixTargetsImported)' != 'true' ">
     <Error Text="The WiX Toolset v3.11 (or newer) build tools must be installed to build this project. To download the WiX Toolset, see http://wixtoolset.org/releases/" />
   </Target>
-  <!--
-	To modify your build process, add your task inside one of the targets below and uncomment it.
-	Other similar extension points exist, see Wix.targets.-->
-  <Target Name="BeforeBuild">
-    <HeatDirectory Directory="..\staging" PreprocessorVariable="var.HarvestPath" OutputFile="HeatGeneratedFileList.wxs" ComponentGroupName="HeatGenerated" DirectoryRefId="INSTALLFOLDER" AutogenerateGuids="true" ToolPath="$(WixToolPath)" SuppressFragments="true" SuppressRegistry="true" SuppressRootDirectory="true" />
-  </Target>
-  <!--
-	<Target Name="AfterBuild">
-	</Target>
-	-->
-</Project>
\ No newline at end of file
+</Project>
diff -pruN 3.4.2-7/dist/win-installer/mlpack-win-installer/Product.wxs 4.0.1-1/dist/win-installer/mlpack-win-installer/Product.wxs
--- 3.4.2-7/dist/win-installer/mlpack-win-installer/Product.wxs	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/dist/win-installer/mlpack-win-installer/Product.wxs	2022-12-29 15:40:18.000000000 +0000
@@ -2,47 +2,38 @@
 <Wix xmlns="http://schemas.microsoft.com/wix/2006/wi">
   <!-- 1) DO NOT CHANGE the product GUID! It is forever -->
   <!-- 2) %MLPACK_VERSION env var is set by .appveyor.yml -->
-	<Product Id="02A00C77-197D-4E91-B7D9-5836220E92E9"
-           UpgradeCode="6C2D7EC0-6F10-40CB-9703-1DC160A62662"
-           Name="mlpack"
-           Language="1033"
-           Version="$(env.MLPACK_VERSION)" 
-           Manufacturer="mlpack">
-		
-        <Package InstallerVersion="200"
-             Description="mlpack Windows Installer"
-             Compressed="yes" 
-             InstallScope="perMachine"
-             Platform="x64"/>
+  <Product Id="02A00C77-197D-4E91-B7D9-5836220E92E9"
+       UpgradeCode="6C2D7EC0-6F10-40CB-9703-1DC160A62662"
+       Name="mlpack"
+       Language="1033"
+       Version="$(env.MLPACK_VERSION)"
+       Manufacturer="mlpack">
+    <Package InstallerVersion="200"
+         Description="mlpack Windows Installer"
+         Compressed="yes"
+         InstallScope="perMachine"
+         Platform="x64"/>
 
-        <MajorUpgrade DowngradeErrorMessage="A newer version of [ProductName] is already installed." />
-		
-        <MediaTemplate EmbedCab="yes"/>
+    <MajorUpgrade DowngradeErrorMessage="A newer version of [ProductName] is already installed." />
+    <MediaTemplate EmbedCab="yes"/>
 
-		<Feature Id="ProductFeature" Title="mlpackWindows" Level="1">
-			<ComponentGroupRef Id="ProductComponents" />
-		</Feature>
-        <Property Id="MLPACK_VERSION">$(env.MLPACK_VERSION)</Property>
-        <Property Id="WIXUI_INSTALLDIR" Value="INSTALLFOLDER" />
-        <WixVariable Id="WixUILicenseRtf" Value="..\staging\license.rtf"/>
-        <WixVariable Id="WixUIBannerBmp" Value="..\res\banner.jpg"/>
-        <WixVariable Id="WixUIDialogBmp" Value="..\res\dialog_white.jpg"/>
-        <UIRef Id="WixUI_InstallDir" />
-	</Product>
+    <Directory Id="TARGETDIR" Name="SourceDir">
+      <Directory Id="ProgramFilesFolder" Name="PFiles">
+        <Directory Id="INSTALLDIR" Name="mlpack">
+          <Directory Id="Sources" />
+        </Directory>
+      </Directory>
+    </Directory>
 
-	<Fragment>
-		<Directory Id="TARGETDIR" Name="SourceDir">
-			<Directory Id="ProgramFiles64Folder">
-				<Directory Id="INSTALLFOLDER" Name="mlpack" />
-			</Directory>
-		</Directory>
-	</Fragment>
-
-	<Fragment>
-		<ComponentGroup Id="ProductComponents" Directory="INSTALLFOLDER">
-           <!-- This references the list of mlpack files automatically generated using Heat (see .wixproj BeforeBuild Target) -->
-           <ComponentGroupRef Id="HeatGenerated"/>
-		</ComponentGroup>
-	</Fragment>
+    <Feature Id="ProductFeature" Title="mlpackWindows" ConfigurableDirectory="INSTALLDIR" Level="1">
+      <ComponentGroupRef Id="Sources" />
+    </Feature>
 
+    <Property Id="MLPACK_VERSION">$(env.MLPACK_VERSION)</Property>
+    <Property Id="WIXUI_INSTALLDIR" Value="INSTALLDIR" />
+    <WixVariable Id="WixUILicenseRtf" Value="..\staging\license.rtf"/>
+    <WixVariable Id="WixUIBannerBmp" Value="..\res\banner.jpg"/>
+    <WixVariable Id="WixUIDialogBmp" Value="..\res\dialog_white.jpg"/>
+    <UIRef Id="WixUI_InstallDir" />
+  </Product>
 </Wix>
diff -pruN 3.4.2-7/doc/developer/bindings.md 4.0.1-1/doc/developer/bindings.md
--- 3.4.2-7/doc/developer/bindings.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/developer/bindings.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,1332 @@
+# mlpack automatic bindings to other languages
+
+mlpack has a system to automatically generate bindings to other languages, such
+as Python and command-line programs, and it is extensible to other languages
+with some amount of ease.  The maintenance burden of this system is low, and it
+is designed in such a way that the bindings produced are always up to date
+across languages and up to date with the mlpack library itself.
+
+This document describes the full functioning of the system, and is a good place
+to start for someone who wishes to understand the system so that they can
+contribute a new binding language, or someone who wants to understand so they
+can adapt the system for use in their own project, or someone who is simply
+curious enough to see how the sausage is made.
+
+The document is split into several sections:
+
+ - [Introduction](#introduction)
+ - [Writing code that can be turned into a binding](#writing-code-that-can-be-turned-into-a-binding)
+ - [How to write mlpack bindings](#how-to-write-mlpack-bindings)
+ - [Structure of IO module and associated macros](#structure-of-io-module-and-associated-macros)
+ - [Command-line program bindings](#command-line-program-bindings)
+ - [Python bindings](#python-bindings)
+ - [Adding new binding types](#adding-new-binding-types)
+
+## Introduction
+
+C++ is not the most popular language on the planet, and it (unfortunately) can
+scare many away with its ultra-verbose error messages, confusing template rules,
+and complex metaprogramming techniques.  Most practitioners of machine learning
+tend to avoid writing native C++ and instead prefer other languages---probably
+most notably Python.
+
+In the case of Python, many projects will use tools like
+[SWIG](http://www.swig.org) to automatically generate bindings, or they might
+hand-write Cython.  The same types of strategies may be used for other
+languages; hand-written MEX files may be used for MATLAB, hand-written Rcpp
+bindings might be used for R bindings, and so forth.
+
+However, these approaches have a fundamental flaw: the hand-written bindings
+must be maintained, and risk going out of date as the rest of the library
+changes or new functionality is added.  This incurs a maintenance burden: each
+major change to the library means that someone must update the bindings and test
+that they are still working.  mlpack is not prepared to handle this maintenance
+workload; therefore an alternate solution is needed.
+
+At the time of the design of this system, mlpack shipped headers for a C++
+library as well as many (~40) hand-written command-line programs that used the
+`mlpack::IO` object to manage command-line arguments.  These programs all had
+similar structure, and could be logically split into three sections:
+
+ - parse the input options supplied by the user
+ - run the machine learning algorithm
+ - prepare the output to return to the user
+
+The user might interface with this command-line program like the following:
+
+```sh
+$ mlpack_knn -r reference.csv -q query.csv -k 3 -d d.csv -n n.csv
+```
+
+That is, they would pass a number of input options---some were numeric values
+(like `-k 3`); some were filenames (like `-r reference.csv`); and a few other
+types also.  Therefore, the first stage of the program---parsing input
+options---would be handled by reading the command line and loading any input
+matrices.  Preparing the output, which usually consists of data matrices (i.e.
+`-d d.csv`) involves saving the matrix returned by the algorithm to the user's
+desired file.
+
+Ideally, any binding to any language would have this same structure, and the
+actual "run the machine learning algorithm" code could be identical.  For
+MATLAB, for instance, we would not need to read the file `reference.csv` but
+instead the user would simply pass their data matrix as an argument.  So each
+input and output parameter would need to be handled differently, but the
+algorithm could be run identically across all bindings.
+
+Therefore, design of an automatically-generated binding system would simply
+involve generating the boilerplate code necessary to parse input options for a
+given language, and to return output options to a user.
+
+## Writing code that can be turned into a binding
+
+This section details what a binding file might actually look like.  It is good
+to have this API in mind when reading the following sections.
+
+Each mlpack binding is typically contained in the `src/mlpack/methods/` folder
+corresponding to a given machine learning algorithm, with the suffix
+`_main.cpp`; so an example is `src/mlpack/methods/pca/pca_main.cpp`.
+
+These files have roughly two parts:
+
+ - definition of the input and output parameters with `PARAM` macros and
+   documentation with `BINDING` macros
+ - implementation of `BINDING_FUNCTION()`, which is the actual machine learning
+   code
+
+Here is a simple example file:
+
+```c++
+// This is a stripped version of mean_shift_main.cpp.
+#include <mlpack/core.hpp>
+
+// Define the name of the binding (as seen by the binding generation system).
+#undef BINDING_NAME
+#define BINDING_NAME mean_shift
+
+#include <mlpack/core/util/mlpack_main.hpp>
+
+#include <mlpack/core/kernels/gaussian_kernel.hpp>
+#include "mean_shift.hpp"
+
+using namespace mlpack;
+using namespace mlpack::meanshift;
+using namespace mlpack::kernel;
+using namespace std;
+
+// Define the help text for the program.  The PRINT_PARAM_STRING() and
+// PRINT_DATASET() macros are used to print the name of the parameter as seen in
+// the binding type that is being used, and the PRINT_CALL() macro generates a
+// sample invocation of the program in the language of the binding type that is
+// being used.  Note that the macros must have + on either side of them.  We
+// provide some extra references with the "SEE_ALSO()" macro, which is used to
+// generate documentation for the website.
+
+// Program Name.
+BINDING_USER_NAME("Mean Shift Clustering");
+
+// Short description.
+BINDING_SHORT_DESC(
+    "A fast implementation of mean-shift clustering using dual-tree range "
+    "search.  Given a dataset, this uses the mean shift algorithm to produce "
+    "and return a clustering of the data.");
+
+// Long description.
+BINDING_LONG_DESC(
+    "This program performs mean shift clustering on the given dataset, storing "
+    "the learned cluster assignments either as a column of labels in the input "
+    "dataset or separately."
+    "\n\n"
+    "The input dataset should be specified with the " +
+    PRINT_PARAM_STRING("input") + " parameter, and the radius used for search"
+    " can be specified with the " + PRINT_PARAM_STRING("radius") + " "
+    "parameter.  The maximum number of iterations before algorithm termination "
+    "is controlled with the " + PRINT_PARAM_STRING("max_iterations") + " "
+    "parameter."
+    "\n\n"
+    "The output labels may be saved with the " + PRINT_PARAM_STRING("output") +
+    " output parameter and the centroids of each cluster may be saved with the"
+    " " + PRINT_PARAM_STRING("centroid") + " output parameter.");
+
+// Example.
+BINDING_EXAMPLE(
+    "For example, to run mean shift clustering on the dataset " +
+    PRINT_DATASET("data") + " and store the centroids to " +
+    PRINT_DATASET("centroids") + ", the following command may be used: "
+    "\n\n" +
+    PRINT_CALL("mean_shift", "input", "data", "centroid", "centroids"));
+
+// See also...
+BINDING_SEE_ALSO("@kmeans", "#kmeans");
+BINDING_SEE_ALSO("@dbscan", "#dbscan");
+BINDING_SEE_ALSO("Mean shift on Wikipedia",
+        "https://en.wikipedia.org/wiki/Mean_shift");
+BINDING_SEE_ALSO("Mean Shift, Mode Seeking, and Clustering (pdf)",
+        "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.510.1222"
+        "&rep=rep1&type=pdf");
+BINDING_SEE_ALSO("mlpack::mean_shift::MeanShift C++ class documentation",
+        "@src/mlpack/methods/mean_shift/mean_shift.hpp");
+
+// Define parameters for the executable.
+
+// Required option: the user must give us a matrix.
+PARAM_MATRIX_IN_REQ("input", "Input dataset to perform clustering on.", "i");
+
+// Output options: the user can save the output matrix of labels and/or the
+// centroids.
+PARAM_UCOL_OUT("output", "Matrix to write output labels to.", "o");
+PARAM_MATRIX_OUT("centroid", "If specified, the centroids of each cluster will "
+    "be written to the given matrix.", "C");
+
+// Mean shift configuration options.
+PARAM_INT_IN("max_iterations", "Maximum number of iterations before mean shift "
+    "terminates.", "m", 1000);
+PARAM_DOUBLE_IN("radius", "If the distance between two centroids is less than "
+    "the given radius, one will be removed.  A radius of 0 or less means an "
+    "estimate will be calculated and used for the radius.", "r", 0);
+
+void BINDING_FUNCTION(util::Params& params, util::Timers& timers)
+{
+  // Process the parameters that the user passed.
+  const double radius = params.Get<double>("radius");
+  const int maxIterations = params.Get<int>("max_iterations");
+
+  if (maxIterations < 0)
+  {
+    Log::Fatal << "Invalid value for maximum iterations (" << maxIterations <<
+        ")! Must be greater than or equal to 0." << endl;
+  }
+
+  // Warn, if the user did not specify that they wanted any output.
+  if (!params.Has("output") && !params.Has("centroid"))
+  {
+    Log::Warn << "--output_file, --in_place, and --centroid_file are not set; "
+        << "no results will be saved." << endl;
+  }
+
+  arma::mat dataset = std::move(params.Get<arma::mat>("input"));
+  arma::mat centroids;
+  arma::Col<size_t> assignments;
+
+  // Prepare and run the actual algorithm.
+  MeanShift<> meanShift(radius, maxIterations);
+
+  timers.Start("clustering");
+  Log::Info << "Performing mean shift clustering..." << endl;
+  meanShift.Cluster(dataset, assignments, centroids);
+  timers.Stop("clustering");
+
+  Log::Info << "Found " << centroids.n_cols << " centroids." << endl;
+  if (radius <= 0.0)
+    Log::Info << "Estimated radius was " << meanShift.Radius() << ".\n";
+
+  // Should we give the user the output matrix?
+  if (params.Has("output"))
+    params.Get<arma::Col<size_t>>("output") = std::move(assignments);
+
+  // Should we give the user the centroid matrix?
+  if (params.Has("centroid"))
+    params.Get<arma::mat>("centroid") = std::move(centroids);
+}
+```
+
+We can see that we have defined the name of the binding with the `BINDING_NAME`
+macro, and basic program information in the `BINDING_USER_NAME()`,
+`BINDING_SHORT_DESC()`, `BINDING_LONG_DESC()`, `BINDING_EXAMPLE()` and
+`BINDING_SEE_ALSO()` macros.  This is, for instance, what is displayed to
+describe the binding if the user passed the `--help` option for a command-line
+program.
+
+Then, we define five parameters, three input and two output, that define the
+data and options that the mean shift clustering will function on.  These
+parameters are defined with the `PARAM` macros, of which there are many.  The
+names of these macros specify the type, whether the parameter is required, and
+whether the parameter is input or output.  Some examples:
+
+ - `PARAM_STRING_IN()` -- a string-type input parameter
+ - `PARAM_MATRIX_OUT()` -- a matrix-type output parameter
+ - `PARAM_DOUBLE_IN_REQ()` -- a required double-type input parameter
+ - `PARAM_UMATRIX_IN()` -- an unsigned matrix-type input parameter
+ - `PARAM_MODEL_IN()` -- a serializable model-type input parameter
+
+Note that each of these macros may have slightly different syntax.  See the
+links above for further documentation.
+
+In order to write a new binding, then, you simply must define `BINDING_NAME`,
+then write `BINDING_USER_NAME()`, `BINDING_SHORT_DESC()`, `BINDING_LONG_DESC()`,
+`BINDING_EXAMPLE()` and `BINDING_SEE_ALSO()` definitions of the program with
+some docuentation, define the input and output parameters as `PARAM` macros, and
+then write a `BINDING_FUNCTION()` function that actually performs the
+functionality of the binding.
+
+Inside of `BINDING_FUNCTION(util::Params& params, util::Timers& timers)`:
+
+ - All input parameters are accessible through `params.Get<type>("name")`.
+ - All output parameters should be set by the end of the function with the
+      `params.Get<type>("name")` method.
+ - The `params.Has("name")` function will return `true` if the parameter
+      `"name"` was specified.
+ - Timers can be started and stopped with `timers.Start("timer_name")` and
+      `timers.Stop("timer_name")`.
+
+Then, assuming that your program is saved in the file `program_name_main.cpp`,
+generating bindings for other languages is a simple addition to the
+`CMakeLists.txt` file in `src/mlpack/methods/CMakeLists.txt`:
+
+```
+add_all_bindings(program_dir program_name "category")
+```
+
+In this example, this will also add a Markdown binding, which will generate
+documentation that is typically used to build the website.  The `category`
+parameter should be one of the categories in
+`src/mlpack/bindings/markdown/MarkdownCategories.cmake`.
+
+## How to write mlpack bindings
+
+This section describes the general structure of the automatic binding system and
+how one might write a new binding for mlpack.  After reading this section it
+should be relatively clear how one could use the provided functionality in the
+`Params` and `Timers` class along with CMake to add a binding for a new mlpack
+machine learning method.  If it is not clear, then the examples in the following
+sections should clarify.
+
+### Providing a name with `BINDING_NAME`
+
+Every binding must have the macro `BINDING_NAME` defined, specifying a name
+(without spaces, generally all lowercase) that will be used to represent the
+binding.  It is suggested to `#undef` any previous setting of `BINDING_NAME`
+just to prevent any strange error messages in case it is already defined.
+
+Here is an example that can be adapted:
+
+```c++
+#undef BINDING_NAME
+#define BINDING_NAME my_binding_name
+
+// BINDING_NAME should be defined before including mlpack_main.hpp!
+#include <mlpack/core/util/mlpack_main.hpp>
+```
+
+If this macro is not defined, compilation of the binding will fail in many ways
+with potentially obscure error messages!  (Sorry that they are bad error
+messages.  The preprocessor doesn't give us too much to work with.)
+
+### Documenting a program with macros
+
+Any mlpack binding should be documented with the `BINDING_USER_NAME()`,
+`BINDING_SHORT_DESC()`, `BINDING_LONG_DESC()`, `BINDING_EXAMPLE()` and
+`BINDING_SEE_ALSO()` macros, which is available from the
+`<mlpack/core/util/mlpack_main.hpp>` header.  The macros are of the form
+
+```c++
+BINDING_USER_NAME("program name");
+BINDING_SHORT_DESC("This is a short, two-sentence description of what the program does.");
+BINDING_LONG_DESC("This is a long description of what the program does."
+    " It might be many lines long and have lots of details about different options.");
+BINDING_EXAMPLE("This contains one example for this particular binding.\n" +
+    PROGRAM_CALL(...));
+BINDING_EXAMPLE("This contains another example for this particular binding.\n" +
+    PROGRAM_CALL(...));
+// There could be many of these "see alsos".
+BINDING_SEE_ALSO("https://en.wikipedia.org/wiki/Machine_learning");
+```
+
+The short documentation should be two sentences indicating what the program
+implements and does, and a quick overview of how it can be used and what it
+should be used for.  When writing new short documentation, it is a good idea to
+take a look at the existing documentation to get an idea of the general format.
+
+For the "see also" section, you can specify as many `SEE_ALSO()` calls as you
+see fit.  These are links used at the "see also" section of the website
+documentation for each binding, and it's very important that relevant links are
+provided (also to other bindings).  See the `SEE_ALSO()` documentation for more
+details.
+
+Although it is possible to provide very short documentation, it is certainly
+better to provide a long description including
+
+ - what the program does
+ - a basic overview of what input and output parameters the program has
+ - at least one example invocation
+
+Examples are very important, and are probably what most users are going to
+immediately search for, instead of taking a long time to read and carefully
+consider all of the written documentation.
+
+However, it is difficult to write language-agnostic documentation.  For
+instance, in a command-line program, an output parameter `--output_file` would
+be specified on the command line as an input parameter, but in Python, the
+output parameter 'output' would actually simply be returned from the call to the
+Python function.  Therefore, we must be careful how our documentation refers to
+input and output parameters.  The following general guidelines can help:
+
+ - Always refer to output parameters as "output parameters", which is a fairly
+   close term that can be interpreted to mean both "return values" for languages
+   like Python and MATLAB and also "arguments given on the command line" for
+   command line programs.
+
+ - Use the provided `PRINT_PARAM_STRING()` macro to print the names of
+   parameters.  For instance, `PRINT_PARAM_STRING("shuffle")` will print
+   `--shuffle` for a command line program and `'shuffle'` for a Python
+   binding.  The `PRINT_PARAM_STRING()` macro also takes into account the type
+   of the parameter.
+
+ - Use the provided `PRINT_DATASET()` and `PRINT_MODEL()` macro to introduce
+   example datasets or models, which can be useful when introducing an example
+   usage of the program.  So you could write `"to run with a dataset " +
+   PRINT_DATASET("data") + "..."`.
+
+ - Use the provided `PRINT_CALL()` macro to print example invocations of the
+   program.  The first argument is the name of the program, and then the
+   following arguments should be the name of a parameter followed by the value
+   of that parameter.
+
+ - Never mention files in the documentation---files are only relevant to
+   command-line programs.  Similarly, avoid mentioning anything
+   language-specific.
+
+ - Remember that some languages give output through return values and some give
+   output using other input parameters.  So the right verbiage to use is, e.g.,
+   `the results may be saved using the PRINT_PARAM_STRING("output") parameter`,
+   and ***not*** `the results are returned through the
+   PRINT_PARAM_STRING("output") parameter`.
+
+Each of these macros (`PRINT_PARAM_STRING()`, `PRINT_DATASET()`,
+`PRINT_MODEL()`, and `PRINT_CALL()`) provides different output depending on the
+language.  Below are some example of documentation strings and their outputs for
+different languages.  Note that the output might not be *exactly* as written or
+formatted here, but the general gist should be the same.
+
+*Input C++ (snippet):*
+
+```c++
+  "The parameter " + PRINT_PARAM_STRING("shuffle") + ", if set, will shuffle "
+  "the data before learning."
+```
+
+*Command-line program output (snippet):*
+
+```
+  The parameter '--shuffle', if set, will shuffle the data before learning.
+```
+
+*Python binding output (snippet):*
+
+```
+  The parameter 'shuffle', if set, will shuffle the data before learning.
+```
+
+*Julia binding output (snippet):*
+
+```
+  The parameter `shuffle`, if set, will shuffle the data before learning.
+```
+
+*Go binding output (snippet):*
+
+```
+  The parameter "Shuffle", if set, will shuffle the data before learning.
+```
+
+Another example:
+
+*Input C++ (snippet):*
+
+```c++
+  "The output matrix can be saved with the " + PRINT_PARAM_STRING("output") +
+  " output parameter."
+```
+
+*Command-line program output (snippet):*
+
+```
+  The output matrix can be saved with the '--output_file' output parameter.
+```
+
+*Python binding output (snippet):*
+
+```
+  The output matrix can be saved with the 'output' output parameter.
+```
+
+*Julia binding output (snippet):*
+
+```
+  The output matrix can be saved with the `output` output parameter.
+```
+
+*Go binding output (snippet):*
+
+```
+  The output matrix can be saved with the "output" output parameter.
+```
+
+And another example:
+
+*Input C++ (snippet):*
+
+```c++
+  "For example, to train a model on the dataset " + PRINT_DATASET("x") + " and "
+  "save the output model to " + PRINT_MODEL("model") + ", the following command"
+  " can be used:"
+  "\n\n" +
+  PRINT_CALL("program", "input", "x", "output_model", "model")
+```
+
+*Command-line program output (snippet):*
+
+```
+  For example, to train a model on the dataset 'x.csv' and save the output model
+  to 'model.bin', the following command can be used:
+
+  $ program --input_file x.csv --output_model_file model.bin
+```
+
+*Python binding output (snippet):*
+
+```
+  For example, to train a model on the dataset 'x' and save the output model to
+  'model', the following command can be used:
+
+  >>> output = program(input=x)
+  >>> model = output['output_model']
+```
+
+*Julia binding output (snippet):*
+
+```
+  For example, to train a model on the dataset `x` and save the output model to
+  `model`, the following command can be used:
+
+  julia> model = program(input=x)
+```
+
+*Go binding output (snippet):*
+
+```
+  For example, to train a model on the dataset "x" and save the output model to
+  "model", the following command can be used:
+
+    // Initialize optional parameters for Program().
+    param := mlpack.ProgramOptions()
+    param.Input = x
+
+    model := mlpack.Program(param)
+```
+
+And finally, a full program example:
+
+*Input C++ (full program, `random_numbers_main.cpp`):*
+
+```c++
+  // Program Name.
+  BINDING_USER_NAME("Random Numbers");
+
+  // Short description.
+  BINDING_SHORT_DESC("An implementation of Random Numbers");
+
+  // Long description.
+  BINDING_LONG_DESC(
+      "This program generates random numbers with a "
+      "variety of nonsensical techniques and example parameters.  The input "
+      "dataset, which will be ignored, can be specified with the " +
+      PRINT_PARAM_STRING("input") + " parameter.  If you would like to subtract"
+      " values from each number, specify the " +
+      PRINT_PARAM_STRING("subtract") + " parameter.  The number of random "
+      "numbers to generate is specified with the " +
+      PRINT_PARAM_STRING("num_values") + " parameter."
+      "\n\n"
+      "The output random numbers can be saved with the " +
+      PRINT_PARAM_STRING("output") + " output parameter.  In addition, a "
+      "randomly generated linear regression model can be saved with the " +
+      PRINT_PARAM_STRING("output_model") + " output parameter.");
+
+  // Example.
+  BINDING_EXAMPLE(
+      "For example, to generate 100 random numbers with 3 subtracted from them "
+      "and save the output to " + PRINT_DATASET("rand") + " and the random "
+      "model to " + PRINT_MODEL("rand_lr") + ", use the following "
+      "command:"
+      "\n\n" +
+      PRINT_CALL("random_numbers", "num_values", 100, "subtract", 3, "output",
+          "rand", "output_model", "rand_lr"));
+```
+
+*Command line output*:
+
+```
+    Random Numbers
+
+    This program generates random numbers with a variety of nonsensical
+    techniques and example parameters.  The input dataset, which will be
+    ignored, can be specified with the '--input_file' parameter.  If you would
+    like to subtract values from each number, specify the '--subtract'
+    parameter.  The number of random numbers to generate is specified with the
+    '--num_values' parameter.
+
+    The output random numbers can be saved with the '--output_file' output
+    parameter.  In addition, a randomly generated linear regression model can be
+    saved with the '--output_model_file' output parameter.
+
+    For example, to generate 100 random numbers with 3 subtracted from them and
+    save the output to 'rand.csv' and the random model to 'rand_lr.bin', use the
+    following command:
+
+    $ random_numbers --num_values 100 --subtract 3 --output_file rand.csv
+      --output_model_file rand_lr.bin
+```
+
+*Python binding output*:
+
+```
+    Random Numbers
+
+    This program generates random numbers with a variety of nonsensical
+    techniques and example parameters.  The input dataset, which will be
+    ignored, can be specified with the 'input' parameter.  If you would like to
+    subtract values from each number, specify the 'subtract' parameter.  The
+    number of random numbers to generate is specified with the 'num_values'
+    parameter.
+
+    The output random numbers can be saved with the 'output' output parameter.
+    In addition, a randomly generated linear regression model can be saved with
+    the 'output_model' output parameter.
+
+    For example, to generate 100 random numbers with 3 subtracted from them and
+    save the output to 'rand' and the random model to 'rand_lr', use the
+    following command:
+
+    >>> output = random_numbers(num_values=100, subtract=3)
+    >>> rand = output['output']
+    >>> rand_lr = output['output_model']
+```
+
+*Julia binding output:*
+
+```
+    Random Numbers
+
+    This program generates random numbers with a variety of nonsensical
+    techniques and example parameters.  The input dataset, which will be
+    ignored, can be specified with the `input` parameter.  If you would like to
+    subtract values from each number, specify the `subtract` parameter.  The
+    number of random numbers to generate is specified with the `num_values`
+    parameter.
+
+    The output random numbers can be saved with the `output` output parameter.
+    In addition, a randomly generated linear regression model can be saved with
+    the `output_model` output parameter.
+
+    For example, to generate 100 random numbers with 3 subtracted from them and
+    save the output to `rand` and the random model to `rand_lr`, use the
+    following command:
+
+    ```julia
+    julia> rand, rand_lr = random_numbers(num_values=100, subtract=3)
+    ```
+```
+
+*Go binding output:*
+
+```
+    Random Numbers
+
+    This program generates random numbers with a variety of nonsensical
+    techniques and example parameters.  The input dataset, which will be
+    ignored, can be specified with the "Input" parameter.  If you would like to
+    subtract values from each number, specify the "Subtract" parameter.  The
+    number of random numbers to generate is specified with the "NumValues"
+    parameter.
+
+    The output random numbers can be saved with the "output" output parameter.
+    In addition, a randomly generated linear regression model can be saved with
+    the "outputModel" output parameter.
+
+    For example, to generate 100 random numbers with 3 subtracted from them and
+    save the output to "rand" and the random model to "randLr", use the
+    following command:
+
+    // Initialize optional parameters for RandomNumbers().
+    param := mlpack.RandomNumbersOptions()
+    param.NumValues = 100
+    param.Subtract=3
+
+    rand, randLr := mlpack.RandomNumbers(param)
+```
+
+### Defining parameters for a program
+
+There exist several macros that can be used after a `BINDING_LONG_DESC()` and
+`BINDING_EXAMPLE()` definition to define the parameters that can be specified
+for a given mlpack program. These macros all have the same general definition:
+the name of the macro specifies the type of the parameter, whether or not the
+parameter is required, and whether the parameter is an input or output
+parameter.  Then as arguments to the macros, the name, description, and
+sometimes the single-character alias and the default value of the parameter.
+
+To give a flavor of how these definitions look, the definition
+
+```c++
+PARAM_STRING_IN("algorithm", "The algorithm to use: 'svd' or 'blah'.", "a");
+```
+
+will define a string input parameter `algorithm` (referenced as `--algorithm`
+from the command-line or `'algorithm'` from Python) with the description `The
+algorithm to use: 'svd' or 'blah'.`  The single-character alias `-a` can be used
+from a command-line program (but means nothing in Python).
+
+There are numerous different macros that can be used:
+
+ - `PARAM_FLAG()` - boolean flag parameter
+ - `PARAM_INT_IN()` - integer input parameter
+ - `PARAM_INT_OUT()` - integer output parameter
+ - `PARAM_DOUBLE_IN()` - double input parameter
+ - `PARAM_DOUBLE_OUT()` - double output parameter
+ - `PARAM_STRING_IN()` - string input parameter
+ - `PARAM_STRING_OUT()` - string output parameter
+ - `PARAM_MATRIX_IN()` - double-valued matrix (`arma::mat`) input parameter
+ - `PARAM_MATRIX_OUT()` - double-valued matrix (`arma::mat`) output parameter
+ - `PARAM_UMATRIX_IN()` - size_t-valued matrix (`arma::Mat<size_t>`) input
+       parameter
+ - `PARAM_UMATRIX_OUT()` - size_t-valued matrix (`arma::Mat<size_t>`) output
+       parameter
+ - `PARAM_TMATRIX_IN()` - transposed double-valued matrix (`arma::mat`) input
+       parameter
+ - `PARAM_TMATRIX_OUT()` - transposed double-valued matrix (`arma::mat`) output
+       parameter
+ - `PARAM_MATRIX_AND_INFO_IN()` - matrix with categoricals input parameter
+       (`std::tuple<data::DatasetInfo, arma::mat`)
+ - `PARAM_COL_IN()` - double-valued column vector (`arma::vec`) input parameter
+ - `PARAM_COL_OUT()` - double-valued column vector (`arma::vec`) output
+       parameter
+ - `PARAM_UCOL_IN()` - size_t-valued column vector (`arma::Col<size_t>`) input
+       parameter
+ - `PARAM_UCOL_OUT()` - size_t-valued column vector (`arma::Col<size_t>`) output
+       parameter
+ - `PARAM_ROW_IN()` - double-valued row vector (`arma::rowvec`) input parameter
+ - `PARAM_ROW_OUT()` - double-valued row vector (`arma::rowvec`) output
+       parameter
+ - `PARAM_VECTOR_IN()` - `std::vector` input parameter
+ - `PARAM_VECTOR_OUT()` - `std::vector` output parameter
+ - `PARAM_MODEL_IN()` - serializable model input parameter
+ - `PARAM_MODEL_OUT()` - serializable model output parameter
+
+And for input parameters, the parameter may also be required:
+
+ - `PARAM_INT_IN_REQ()`
+ - `PARAM_DOUBLE_IN_REQ()`
+ - `PARAM_STRING_IN_REQ()`
+ - `PARAM_MATRIX_IN_REQ()`
+ - `PARAM_UMATRIX_IN_REQ()`
+ - `PARAM_TMATRIX_IN_REQ()`
+ - `PARAM_VECTOR_IN_REQ()`
+ - `PARAM_MODEL_IN_REQ()`
+
+See the source documentation for each macro to read further details.  Note also
+that each possible combination of `IN`, `OUT`, and `REQ` is not
+available---output options cannot be required, and some combinations simply have
+not been added because they have not been needed.
+
+The `PARAM_MODEL_IN()` and `PARAM_MODEL_OUT()` macros are used to serialize
+mlpack models.  These could be used, for instance, to allow the user to save a
+trained model (like a linear regression model) or load an input model.  The
+first parameter to the `PARAM_MODEL_IN()` or `PARAM_MODEL_OUT()` macro should be
+the C++ type of the model to be serialized; this type *must* have a function
+`template<typename Archive> void serialize(Archive&)` (i.e. the type must be
+serializable via cereal).  For example, to allow a user to specify an input
+model of type `LinearRegression`, the follow definition could be used:
+
+```c++
+PARAM_MODEL_IN(LinearRegression, "input_model", "The input model to be used.",
+    "i");
+```
+
+Then, the user will be able to specify their model from the command-line as
+`--input_model_file` and from Python using the `input_model` option to the
+generated binding.
+
+From the command line, matrix-type and model-type options (both input and
+output) are loaded from or saved to the specified file.  This means that `_file`
+is appended to the name of the parameter; so if the parameter name is `data` and
+it is of a matrix or model type, then the name that the user will specify on the
+command line will be `--data_file`.  This displayed parameter name change *only*
+occurs with matrix and model type parameters for command-line programs.
+
+The `PARAM_MATRIX_AND_INFO()` macro defines a categorical matrix parameter
+(more specifically, a matrix type that can support categorical columns).  From
+the C++ program side, this means that the parameter type is
+`std::tuple<data::DatasetInfo, arma::mat>`.  From the user side, for a
+command-line program, this means that the user will pass the filename of a
+dataset that can have categorical features, such as an ARFF dataset.  For a
+Python program, the user may pass a Pandas matrix with categorical columns.
+When the program is run, the input that the user gives will be processed and the
+`data::DatasetInfo` object will be filled with the dimension types and the
+`arma::mat` object will be filled with the data itself.
+
+To give some examples, the parameter definitions from the example
+`random_numbers` program in the previous section are shown below.
+
+```c++
+PARAM_MATRIX_IN("input", "The input matrix that will be ignored.", "i");
+PARAM_DOUBLE_IN("subtract", "The value to subtract from each parameter.", "s",
+    0.0); // Default value of 0.0.
+PARAM_INT_IN("num_samples", "The number of samples to generate.", "n", 100);
+
+PARAM_MATRIX_OUT("output", "The output matrix of random samples.", "o");
+PARAM_MODEL_OUT(LinearRegression, "output_model", "The randomly generated "
+    "linear regression output model.", "M");
+```
+
+Note that even the parameter documentation strings must be a little be agnostic
+to the binding type, because the command-line interface is so different than the
+Python interface to the user.
+
+### Using `Params` in a `BINDING_FUNCTION()` function
+
+mlpack's `util::Params` class provides a unified abstract interface for getting
+input from and providing output to users without needing to consider the
+language (command-line, Python, MATLAB, etc.) that the user is running the
+program from.  This means that after the `BINDING_LONG_DESC()` and
+`BINDING_EXAMPLE()` macros and the `PARAM_*()` macros have been defined, a
+language-agnostic `void BINDING_FUNCTION(util::Params& params, util::Timers&
+timers)` function can be written. This function then can perform the actual
+computation that the entire program is meant to.
+
+Inside of a `BINDING_FUNCTION()` function, the given `util::Params` object can
+be used to access input parameters and set output parameters.  There are two
+main functions for this, plus a utility printing function:
+
+ - `params.Get<T>()` - get a reference to a parameter
+ - `params.Has()` - returns true if the user specified the parameter
+ - `params.GetPrintable<T>()` - returns a string representing the value of the
+      parameter
+
+So, to print `hello` if the user specified the `print_hello` parameter, the
+following code could be used:
+
+```c++
+if (params.Has("print_hello"))
+  std::cout << "Hello!" << std::endl;
+else
+  std::cout << "No greetings for you!" << std::endl;
+```
+
+To access a string that a user passed in to the `string` parameter, the
+following code could be used:
+
+```c++
+const std::string& str = params.Has<std::string>("string");
+```
+
+Matrix types are accessed in the same way:
+
+```c++
+arma::mat& matrix = params.Get<arma::mat>("matrix");
+```
+
+Similarly, model types can be accessed.  If a `LinearRegression` model was
+specified by the user as the parameter `model`, the following code can access
+the model:
+
+```c++
+LinearRegression& lr = params.Get<LinearRegression>("model");
+```
+
+Matrices with categoricals are a little trickier to access since the C++
+parameter type is `std::tuple<data::DatasetInfo, arma::mat>`.  The example below
+creates references to both the `DatasetInfo` and matrix objects, assuming the
+user has passed a matrix with categoricals as the `matrix` parameter.
+
+```c++
+using namespace mlpack;
+
+typename std::tuple<data::DatasetInfo, arma::mat> TupleType;
+data::DatasetInfo& di = std::get<0>(params.Get<TupleType>("matrix"));
+arma::mat& matrix = std::get<1>(params.Get<TupleType>("matrix"));
+```
+
+These two functions can be used to write an entire program.  The third function,
+`params.GetPrintable()`, can be used to help provide useful output in a
+program.  Typically, this function should be used if you want to provide some
+kind of error message about a matrix or model parameter, but want to avoid
+printing the matrix itself.  For instance, printing a matrix parameter with
+`params.GetPrintable()` will print the filename for a command-line binding or
+the size of a matrix for a Python binding.  `params.GetPrintable()` for a model
+parameter will print the filename for the model for a command-line binding or a
+simple string representing the type of the model for a Python binding.
+
+Putting all of these ideas together, here is the `BINDING_FUNCTION()` function
+that could be created for the `random_numbers` program from earlier sections.
+
+```c++
+// BINDING_NAME should be defined here: ...
+
+#include <mlpack/core/util/mlpack_main.hpp>
+
+// BINDING_USER_NAME(), BINDING_SHORT_DESC(), BINDING_LONG_DESC() ,
+// BINDING_EXAMPLE(), BINDING_SEE_ALSO() and PARAM_*() definitions should go
+// here: ...
+
+using namespace mlpack;
+
+void BINDING_FUNCTION(util::Params& params, util::Timers& timers)
+{
+  // If the user passed an input matrix, tell them that we'll be ignoring it.
+  if (params.Has("input"))
+  {
+    // Print the filename the user passed, if a command-line binding, or the
+    // size of the matrix passed, if a Python binding.
+    Log::Warn << "The input matrix "
+        << params.GetPrintable<arma::mat>("input") << " is ignored!"
+        << std::endl;
+  }
+
+  // Get the number of samples and also the value we should subtract.
+  const size_t numSamples = (size_t) params.Get<int>("num_samples");
+  const double subtractValue = params.Get<double>("subtract");
+
+  // Create the random matrix (1-dimensional).
+  arma::mat output(1, numSamples, arma::fill::randu);
+  output -= subtractValue;
+
+  // Save the output matrix if the user wants.
+  if (params.Has("output"))
+    params.Get<arma::mat>("output") = std::move(output); // Avoid copy.
+
+  // Did the user request a random linear regression model?
+  if (params.Has("output_model"))
+  {
+    LinearRegression lr;
+    lr.Parameters().randu(10); // 10-dimensional (arbitrary).
+    lr.Lambda() = 0.0;
+    lr.Intercept() = false; // No intercept term.
+
+    params.Get<LinearRegression>("output_model") = std::move(lr);
+  }
+}
+```
+
+### More documentation on using `util::Params`
+
+More documentation for the `util::Params` class can either be found in the
+source code for `util::Params`, or by reading the existing mlpack bindings.
+These can be found in the `src/mlpack/methods/` folders, by finding the
+`_main.cpp` files.  For instance,
+`src/mlpack/methods/neighbor_search/knn_main.cpp` is the k-nearest-neighbor
+search program definition.
+
+## Structure of IO module and associated macros
+
+This section describes the internal functionality of the `IO` module, which
+stores all known parameter sets, and the associated macros.  If you are only
+interested in writing mlpack programs, this section is probably not worth
+reading.
+
+There are eight main components involved with mlpack bindings:
+
+ - the `IO` module, a thread-safe singleton class that stores parameter
+    information
+ - the `BINDING_FUNCTION()` function that defines the functionality of the
+    binding
+ - the `BINDING_NAME()` macro that defines the binding name
+ - the `BINDING_SHORT_DESC()` macro that defines the short description
+ - the `BINDING_LONG_DESC()` macro that defines the long description
+ - (optional) the `BINDING_EXAMPLE()` macro that defines example usages
+ - (optional) the `BINDING_SEE_ALSO()` macro that defines "see also" links
+ - the `PARAM_*()` macros that define parameters for the binding
+
+The `mlpack::IO` module is a singleton class that stores, at runtime, the
+binding name, the documentation, and the parameter information and values for
+any bindings available in the translation unit.  When the binding is called, the
+`mlpack::IO` class instantiates a `util::Params` and `util::Timers` object,
+populating them with the correct options for the given binding, then calls
+`BINDING_FUNCTION()` with those instantiated objects.
+
+In order to do this, each parameter and the program documentation must make
+themselves known to the IO singleton.  This is accomplished by having the @c
+`BINDING_USER_NAME()`, `BINDING_SHORT_DESC()`, `BINDING_LONG_DESC()`,
+`BINDING_EXAMPLE()`, `BINDING_SEE_ALSO()` and `PARAM_*()` macros declare global
+variables that, in their constructors, register themselves with the `IO`
+singleton.
+
+ * The `BINDING_USER_NAME()` macro declares an object of type
+    `mlpack::util::BindingName`.
+ * The `BINDING_SHORT_DESC()` macro declares an object of type
+    `mlpack::util::ShortDescription`.
+ * The `BINDING_LONG_DESC()` macro declares an object of type
+    `mlpack::util::LongDescription`.
+ * The `BINDING_EXAMPLE()` macro declares an object of type
+    `mlpack::util::Example`.
+ * The `BINDING_SEE_ALSO()` macro declares an object of type
+    `mlpack::util::SeeAlso`.
+ * The `BindingName` class constructor calls `IO::AddBindingName()` in order
+    to register the given program name.
+ * The `ShortDescription` class constructor calls `IO::AddShortDescription()`
+    in order to register the given short description.
+ * The `LongDescription` class constructor calls `IO::AddLongDescription()` in
+    order to register the given long description.
+ * The `Example` class constructor calls `IO::AddExample()` in order to
+    register the given example.
+ * The `SeeAlso` class constructor calls `IO::AddSeeAlso()` in order to
+    register the given see-also link.
+
+All of those macro calls use whatever the value of the `BINDING_NAME` macro is
+at the time of instantiation.  This is why it is important that `BINDING_NAME`
+is set properly at the time `mlpack_main.hpp` is included and before any
+options are defined.
+
+The `PARAM_*()` macros declare an object that will, in its constructor, call
+`IO::Add()` to register that parameter for the current binding (again specified
+by the `BINDING_NAME` macro's value) with the IO singleton.  The specific type
+of that object will depend on the binding type being used.
+
+The `IO::AddParameter()` function takes the name of the binding it is for and an
+`mlpack::util::ParamData` object as its input.  This `ParamData` object has a
+number of fields that must be set to properly describe the parameter.  Each of
+the fields is documented and probably self-explanatory, but three fields deserve
+further explanation:
+
+ - the `std::string tname` member is used to encode the true type of the
+   parameter---which is not known by the `IO` singleton at runtime.  This should
+   be set to `TYPENAME(T)` where `T` is the type of the parameter.
+
+ - the `ANY value` member (where `ANY` is whatever type was chosen in case
+   `std::any` is not available) is used to hold the actual value of the
+   parameter.  Typically this will simply be the parameter held by a `ANY`
+   object, but for some types it may be more complex.  For instance, for a
+   command-line matrix option, the `value` parameter will actually hold a tuple
+   containing both the filename and the matrix itself.
+
+ - the `std::string cppType` should be a string containing the type as seen in
+   C++ code.  Typically this can be encoded by stringifying a `PARAM_*()` macro
+   argument.
+
+Thus, the global object defined by the `PARAM_*()` macro must turn its arguments
+into a fully specified `ParamData` object and then call `IO::Add()` with it.
+
+With different binding types, different behavior is often required for the
+`params.Get<T>()`, `params.Has()`, and `params.GetPrintable<T>()` functions.  In
+order to handle this, the `IO` singleton also holds a function pointer map, so
+that a given type of option can call specific functionality for a certain task.
+Given a `util::Params` object (which can be obtained with
+`IO::Parameters("binding_name")`), this function map is accessible as
+`params.functionMap`, and is not meant to be used by users, but instead by
+people writing binding types.
+
+Each function in the map must have signature
+
+```c++
+void MapFunction(const util::ParamData& d,
+                 const void* input,
+                 void* output);
+```
+
+The use of `void` pointers allows any type to be specified as input or output to
+the function without changing the signature for the map.  The `IO` function map
+is of type
+
+```c++
+std::map<std::string, std::map<std::string,
+    void (*)(const util::ParamData&, const void*, void*)>>
+```
+
+and the first map key is the typename (`tname`) of the parameter, and the second
+map key is the string name of the function.  For instance, calling
+
+```c++
+const util::ParamData& d = params.Parameters()["param"];
+params.functionMap[d.tname]["GetParam"](d, input, output);
+```
+
+will call the `GetParam()` function for the type of the `"param"` parameter.
+Examples are probably easiest to understand how this functionality works; see
+the `params.Get<T>()` source to see how this might be used.
+
+The `IO` singleton expects the following functions to be defined in the function
+map for each type:
+
+ - `GetParam` -- return a pointer to the parameter in `output`.
+ - `GetPrintableParam` -- return a pointer to a string description of the
+      parameter in `output`.
+
+If these functions are properly defined, then the `IO` module will work
+correctly.  Other functions may also be defined; these may be used by other
+parts of the binding infrastructure for different languages.
+
+## Command-line program bindings
+
+This section describes the internal functionality of the command-line program
+binding generator.  If you are only interested in writing mlpack programs, this
+section probably is not worth reading.  This section is worth reading only if
+you want to know the specifics of how the `BINDING_FUNCTION()` function and
+macros get turned into a fully working command-line program.
+
+The code for the command-line bindings is found in `src/mlpack/bindings/cli`.
+
+### The `BINDING_FUNCTION()` definition
+
+Any command-line program must be compiled with the `BINDING_TYPE` macro
+set to the value `BINDING_TYPE_CLI`.  This is handled by the CMake macro
+`add_cli_executable()`.
+
+When `BINDING_TYPE` is set to `BINDING_TYPE_CLI`, the following is set in
+`src/mlpack/core/util/mlpack_main.hpp`, which must be included by every mlpack
+binding:
+
+ - The options defined by `PARAM_*()` macros are of type
+   `mlpack::bindings::cli::CLIOption`.
+
+ - The parameter and value printing macros for `BINDING_LONG_DESC()`
+   and `BINDING_EXAMPLE()` are set:
+   * The `PRINT_PARAM_STRING()` macro is defined as
+     `mlpack::bindings::cli::ParamString()`.
+   * The `PRINT_DATASET()` macro is defined as
+     `mlpack::bindings::cli::PrintDataset()`.
+   * The `PRINT_MODEL()` macro is defined as
+     `mlpack::bindings::cli::PrintModel()`.
+   * The `PRINT_CALL()` macro is defined as
+     `mlpack::bindings::cli::ProgramCall()`.
+
+ - The function `int main()` is defined as:
+
+```c++
+int main(int argc, char** argv)
+{
+  // Parse the command-line options; put them into CLI.
+  mlpack::util::Params params =
+      mlpack::bindings::cli::ParseCommandLine(argc, argv);
+  // Create a new timer object for this call.
+  mlpack::util::Timers timers;
+  timers.Enabled() = true;
+  mlpack::Timer::EnableTiming();
+
+  // A "total_time" timer is run by default for each mlpack program.
+  timers.Start("total_time");
+  BINDING_FUNCTION(params, timers);
+  timers.Stop("total_time");
+
+  // Print output options, print verbose information, save model parameters,
+  // clean up, and so forth.
+  mlpack::bindings::cli::EndProgram(params, timers);
+}
+```
+
+Thus any mlpack command-line binding first processes the command-line arguments
+with `mlpack::bindings::cli::ParseCommandLine()`, then runs the binding with
+`BINDING_FUNCTION()`, then cleans up with `mlpack::bindings::cli::EndProgram()`.
+
+The `ParseCommandLine()` function reads the input parameters and sets the
+values in `IO`.  For matrix-type and model-type parameters, this reads the
+filenames from the command-line, but does not load the matrix or model.  Instead
+the matrix or model is loaded the first time it is accessed with
+`params.Get<T>()`.
+
+The `--help` parameter is handled by the `mlpack::bindings::cli::PrintHelp()`
+function.
+
+At the end of program execution, the `mlpack::bindings::cli::EndProgram()`
+function is called.  This writes any output matrix or model parameters to disk,
+and prints the program parameters and timers if `--verbose` was given.
+
+### Matrix and model parameter handling
+
+For command line bindings, the matrix, model, and matrix with categorical type
+parameters all require special handling, since it is not possible to pass a
+matrix of any reasonable size or a model on the command line directly.
+Therefore for a matrix or model parameter, the user specifies the file
+containing that matrix or model parameter.  If the parameter is an input
+parameter, then the file is loaded when `params.Get<T>()` is called.  If the
+parameter is an output parameter, then the matrix or model is saved to the file
+when `EndProgram()` is called.
+
+The actual implementation of this is that the `ANY value` member of the
+`ParamData` struct does not hold the model or the matrix, but instead a
+`std::tuple` containing both the matrix or the model, and the filename
+associated with that matrix or model.
+
+This means that functions like `params.Get<T>()` and `params.GetPrintable<T>()`
+(and all of the other associated functions in the function map) must have
+special handling for matrix or model types.  See those implementations for more
+details---the special handling is enforced via SFINAE.
+
+### Parsing the command line
+
+The `ParseCommandLine()` function uses `CLI11` to read the values from the
+command line into the `ParamData` structs held by the `IO` singleton.
+
+In order to set up `CLI11`---and to keep its headers from needing to be included
+by the rest of the library---the code loops over each parameter known by the
+`IO` singleton and calls the `AddToPO` function from the function map.  This in
+turn calls the necessary functions to register a given parameter with `CLI11`,
+and once all parameters have been registered, the facilities provided by `CLI11`
+are used to parse the command line input properly.
+
+## Python bindings
+
+This section describes the internal functionality of the mlpack Python binding
+generator.  If you are only interested in writing new bindings or building the
+bindings, this section is probably not worth reading.  But if you are interested
+in the internal working of the Python binding generator, then this section is
+for you.
+
+The Python bindings are significantly more complex than the command line
+bindings because we cannot just compile directly to a finished product.  Instead
+we need a multi-stage compilation:
+
+ - We must generate a `setup.py` file that can be used to compile the bindings.
+ - We must generate the `.pyx` (Cython) bindings for each program.
+ - Then we must build each `.pyx` into a `.so` that is loadable from Python.
+ - We must also test the Python bindings.
+
+This is done with a combination of C++ code to generate the `.pyx` bindings,
+CMake to run the actual compilation and generate the `setup.py` file, some
+utility Python functions, and tests written in both Python and C++.  This code
+is primarily contained in `src/mlpack/bindings/python/`.
+
+### Passing matrices to/from Python
+
+The standard Python matrix library is numpy, so mlpack bindings should accept
+numpy matrices as input.  Fortunately, numpy Cython bindings already exist,
+which make it easy to convert from a numpy object to an Armadillo object without
+copying any data.  This code can be found in
+`src/mlpack/bindings/python/mlpack/arma_numpy.pyx`, and is used by the Python
+`params.Get<T>()` functionality.
+
+mlpack also supports categorical matrices; in Python, the typical way of
+representing matrices with categorical features is with Pandas.  Therefore,
+mlpack also accepts Pandas matrices, and if any of the Pandas matrix dimensions
+are categorical, these are properly encoded.  The function
+`to_matrix_with_info()` from `mlpack/bindings/python/mlpack/matrix_utils.py` is
+used to perform this conversion.
+
+### Passing model parameters to/from Python
+
+We use (or abuse) Cython functionality in order to give the user a model object
+that they can use in their Python code.  However, we do not want to (or have the
+infrastructure to) write bindings for every method that a serializable model
+class might support; therefore, we only desire to return a memory pointer to the
+model to the user.
+
+In this way, a user that receives a model from an output parameter can then
+reuse the model as an input parameter to another binding (or the same binding).
+
+To return a function pointer we have to define a Cython class in the following
+way (this example is taken from the perceptron binding):
+
+```py
+cdef extern from "</home/ryan/src/mlpack-rc/src/mlpack/methods/perceptron/perceptron_main.cpp>" nogil:
+  cdef int mlpack_perceptron(Params, Timers) nogil except +RuntimeError
+
+  cdef cppclass PerceptronModel:
+    PerceptronModel() nogil
+
+
+cdef class PerceptronModelType:
+  cdef PerceptronModel* modelptr
+
+  def __cinit__(self):
+    self.modelptr = new PerceptronModel()
+
+  def __dealloc__(self):
+    del self.modelptr
+```
+
+This class definition is automatically generated when the `.pyx` file is
+automatically generated.
+
+### CMake generation of `setup.py`
+
+A boilerplate `setup.py` file can be found in
+`src/mlpack/bindings/python/setup.py.in`.  This will be configured by CMake to
+produce the final `setup.py` file, but in order to do this, a list of the `.pyx`
+files to be compiled must be gathered.
+
+Therefore, the `add_python_binding()` macro is defined in
+`src/mlpack/bindings/python/CMakeLists.txt`.  This adds the given binding to the
+`MLPACK_PYXS` variable, which is then inserted into `setup.py` as part of the
+`configure_file()` step in `src/mlpack/CMakeLists.txt`.
+
+### Generation of `.pyx` files
+
+A binding named `program` is built into a program called
+`generate_pyx_program` (this a CMake target, so you can build these
+individually if you like).  The file
+`src/mlpack/bindings/python/generate_pyx.cpp.in` is configured by CMake to set
+the name of the program and the `*_main.cpp` file to include correctly, then
+the `mlpack::bindings::python::PrintPYX()` function is called by the program.
+The `PrintPYX()` function uses the parameters that have been set in the `IO`
+singleton by the `BINDING_USER_NAME()`, `BINDING_SHORT_DESC()`,
+`BINDING_LONG_DESC()`, `BINDING_EXAMPLE()`, `BINDING_SEE_ALSO()` and `PARAM_*()`
+macros in order to actually print a fully-working `.pyx` file that can be
+compiled.  The file has several sections:
+
+ - Python imports (numpy/pandas/cython/etc.)
+ - Cython imports of C++ utility functions and Armadillo functionality
+ - Cython imports of any necessary serializable model types
+ - Definitions of classes for serializable model types
+ - The binding function definition
+ - Documentation: input and output parameters
+ - The call to `BINDING_FUNCTION()`
+ - Handling of output functionality
+ - Return of output parameters
+
+Any output parameters for Python bindings are returned in a dict containing
+named elements.
+
+### Building the `.pyx` files
+
+After building the `generate_pyx_program` target, the `build_pyx_program` target
+is built as a dependency of the `python` target.  This simply takes the
+generated `.pyx` file and uses Python setuptools to compile this to a Python
+binding.
+
+### Testing the Python bindings
+
+In addition to the C++ tests we have implemented for each binding, we also have
+tests from Python that ensure that we can successfully transfer parameter values
+from Python to C++ and return output correctly.
+
+The tests are in `src/mlpack/bindings/python/tests/` and test both the actual
+bindings and also the auxiliary Python code included in
+`src/mlpack/bindings/python/mlpack/`.
+
+## Adding new binding types
+
+Adding a new binding type to mlpack is fairly straightforward once the general
+structure of the `IO` singleton and the function map that `IO` uses is
+understood.  For each different language that bindings are desired for, the
+route to a solution will be particularly different---so it is hard to provide
+any general guidance for how to make new bindings that will be applicable to
+each language.
+
+In general, the first thing to handle will be how matrices are passed back and
+forth between the target language.  Typically this might mean getting the memory
+address of an input matrix and wrapping an `arma::mat` object around that memory
+address.  This can be handled in the `GetParam()` function that is part of the
+`IO` singleton function map; see `get_param.hpp` for both the `IO` and Python
+bindings for an example (in `src/mlpack/bindings/cli/` and
+`src/mlpack/bindings/python/`).
+
+Serialization of models is also a tricky consideration; in some languages you
+will be able to pass a pointer to the model itself.  This is generally
+best---users should not expect to be able to manipulate the model in the target
+language, but they should expect that they can pass a model back and forth
+without paying a runtime penalty.  So, for example, serializing a model using a
+cereal text archive and then returning the string that represents the model
+is not acceptable, because that string can be extremely large and the time it
+takes to decode the model can be very large.
+
+The strategy of generating a binding definition for the target language, like
+what is done with Python, can be a useful strategy that should be considered.
+If this is the route that is desired, a large amount of CMake boilerplate may be
+necessary.  The Python CMake configuration can be referred to as an example, but
+probably a large amount of adaptation to other languages will be necessary.
+
+Lastly, when adding a new language, be sure to make sure it works with the
+Markdown documentation generator.  In order to make this happen, you will need
+to modify all of the `add_markdown_docs()` calls in
+`src/mlpack/methods/CMakeLists.txt` to contain the name of the language you have
+written a binding for.  You will also need to modify every function in
+`src/mlpack/bindings/markdown/print_doc_functions_impl.hpp` to correctly call
+out to the corresponding function for the language that you have written
+bindings for.
diff -pruN 3.4.2-7/doc/developer/elemtype.md 4.0.1-1/doc/developer/elemtype.md
--- 3.4.2-7/doc/developer/elemtype.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/developer/elemtype.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,36 @@
+# The ElemType policy in mlpack
+
+mlpack algorithms should be as generic as possible.  Often this means
+allowing arbitrary metrics or kernels to be used, but this also means allowing
+any type of data point to be used.  This means that mlpack classes should
+support `float`, `double`, and other observation types.  Some algorithms
+support this through the use of a `MatType` template parameter; others will
+have their own template parameter, `ElemType`.
+
+The `ElemType` template parameter can take any value that can be used by
+Armadillo (or, specifically, classes like `arma::Mat<>` and others); this
+encompasses the types
+
+ - `double`
+ - `float`
+ - `int`
+ - `unsigned int`
+ - `std::complex<double>`
+ - `std::complex<float>`
+
+and other primitive numeric types.  Note that Armadillo does not support some
+integer types for functionality such as matrix decompositions or other more
+advanced linear algebra.  This means that when these integer types are used,
+some algorithms may fail with Armadillo error messages indicating that those
+types cannot be used.
+
+*Note*: if the class has a `MatType` template parameter, `ElemType` can be
+easily defined as below:
+
+```c++
+typedef typename MatType::elem_type ElemType;
+```
+
+and otherwise a template parameter with the name `ElemType` can be used.  It is
+generally a good idea to expose the element type somehow for use by other
+classes.
diff -pruN 3.4.2-7/doc/developer/iodoc.md 4.0.1-1/doc/developer/iodoc.md
--- 3.4.2-7/doc/developer/iodoc.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/developer/iodoc.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,214 @@
+# Writing an mlpack binding
+
+This tutorial gives some simple examples of how to write an mlpack binding that
+can be compiled for multiple languages.  These bindings make up the core of how
+most users will interact with mlpack.
+
+mlpack provides the following:
+
+ - `mlpack::Log`, for debugging / informational / warning / fatal output
+ - a `util::Params` object, for parsing command line options or other option
+ - a `util::Timers` object, for collecting and displaying timing information
+
+Each of those classes are well-documented, and that documentation in the source
+code should be consulted for further reference.
+
+First, we'll discuss the logging infrastructure, which is useful for giving
+output that users can see.
+
+## Simple logging example
+
+mlpack has four logging levels:
+
+ - `Log::Debug`
+ - `Log::Info`
+ - `Log::Warn`
+ - `Log::Fatal`
+
+Output to `Log::Debug` does not show (and has no performance penalty) when
+mlpack is compiled without debugging symbols.  Output to `Log::Info` is only
+shown when the program is run with the `verbose` option (for a command-line
+binding, this is `--verbose` or `-v`).  `Log::Warn` is always shown, and
+`Log::Fatal` will throw a `std::runtime_error` exception, after a newline is
+sent to it. If mlpack was compiled with debugging symbols, `Log::Fatal` will
+also print a backtrace, if the necessary libraries are available.
+
+Here is a simple example binding, and its output.  Note that instead of
+`int main()`, we use `void BINDING_FUNCTION()`.  This is because the
+[automatic binding generator](bindings.md) will set up the environment and
+once that is done, it will call `BINDING_FUNCTION()`.
+
+```c++
+#include <mlpack/core.hpp>
+#include <mlpack/core/util/io.hpp>
+// This definition below means we will only compile for the command line.
+#define BINDING_TYPE BINDING_TYPE_CLI
+#include <mlpack/core/util/mlpack_main.hpp>
+
+using namespace mlpack;
+
+void BINDING_FUNCTION(util::Params& params, util::Timers& timers)
+{
+  Log::Debug << "Compiled with debugging symbols." << std::endl;
+
+  Log::Info << "Some test informational output." << std::endl;
+
+  Log::Warn << "A warning!" << std::endl;
+
+  Log::Fatal << "Program has crashed." << std::endl;
+
+  Log::Warn << "Made it!" << std::endl;
+}
+```
+
+Assuming mlpack is installed on the system and the code above is saved in
+`test.cpp`, this program can be compiled with the following command:
+
+```sh
+$ g++ -o test test.cpp -DDEBUG -g -rdynamic -lmlpack
+```
+
+Since we compiled with `-DDEBUG`, if we run the program as below, the following
+output is shown:
+
+```sh
+$ ./test --verbose
+[DEBUG] Compiled with debugging symbols.
+[INFO ] Some test informational output.
+[WARN ] A warning!
+[FATAL] [bt]: (1) /absolute/path/to/file/example.cpp:6: function()
+[FATAL] Program has crashed.
+terminate called after throwing an instance of 'std::runtime_error'
+  what():  fatal error; see Log::Fatal output
+Aborted
+```
+
+The flags `-g` and `-rdynamic` are only necessary for providing a backtrace.
+If those flags are not given during compilation, the following output would be
+shown:
+
+```sh
+$ ./test --verbose
+[DEBUG] Compiled with debugging symbols.
+[INFO ] Some test informational output.
+[WARN ] A warning!
+[FATAL] Cannot give backtrace because program was compiled without: -g -rdynamic
+[FATAL] For a backtrace, recompile with: -g -rdynamic.
+[FATAL] Program has crashed.
+terminate called after throwing an instance of 'std::runtime_error'
+  what():  fatal error; see Log::Fatal output
+Aborted
+```
+
+The last warning is not reached, because `Log::Fatal` terminates the program.
+
+Without debugging symbols (i.e. without `-g` and `-DDEBUG`) and without
+`--verbose`, the following is shown:
+
+```sh
+$ ./test
+[WARN ] A warning!
+[FATAL] Program has crashed.
+terminate called after throwing an instance of 'std::runtime_error'
+  what():  fatal error; see Log::Fatal output
+Aborted
+```
+
+These four outputs can be very useful for both providing informational output
+and debugging output for your mlpack program.
+
+## Simple parameter example
+
+Through the `mlpack::util::Params` object, parameters can be easily added to a
+binding with the `BINDING_NAME`, `BINDING_SHORT_DESC`, `BINDING_LONG_DESC`,
+`BINDING_EXAMPLE`, `BINDING_SEE_ALSO`, `PARAM_INT`, `PARAM_DOUBLE`,
+`PARAM_STRING`, and `PARAM_FLAG` macros.
+
+Here is a sample use of those macros, extracted from `methods/pca/pca_main.cpp`.
+(Some details have been omitted from the snippet below.)
+
+```c++
+#include <mlpack/core.hpp>
+#include <mlpack/core/util/io.hpp>
+#include <mlpack/core/util/mlpack_main.hpp>
+
+// Program Name.
+BINDING_NAME("Principal Components Analysis");
+
+// Short description.
+BINDING_SHORT_DESC(
+    "An implementation of several strategies for principal components analysis "
+    "(PCA), a common preprocessing step.  Given a dataset and a desired new "
+    "dimensionality, this can reduce the dimensionality of the data using the "
+    "linear transformation determined by PCA.");
+
+// Long description.
+BINDING_LONG_DESC(
+    "This program performs principal components analysis on the given dataset "
+    "using the exact, randomized, randomized block Krylov, or QUIC SVD method. "
+    "It will transform the data onto its principal components, optionally "
+    "performing dimensionality reduction by ignoring the principal components "
+    "with the smallest eigenvalues.");
+
+// See also...
+BINDING_SEE_ALSO("Principal component analysis on Wikipedia",
+    "https://en.wikipedia.org/wiki/Principal_component_analysis");
+BINDING_SEE_ALSO("PCA C++ class documentation",
+    "@src/mlpack/methods/pca/pca.hpp");
+
+// Parameters for program.
+PARAM_MATRIX_IN_REQ("input", "Input dataset to perform PCA on.", "i");
+PARAM_MATRIX_OUT("output", "Matrix to save modified dataset to.", "o");
+PARAM_INT_IN("new_dimensionality", "Desired dimensionality of output dataset.",
+    "d", 0);
+
+using namespace mlpack;
+
+void BINDING_FUNCTION(util::Params& params, util::Timers& timers)
+{
+  // Load input dataset.
+  arma::mat& dataset = params.Get<arma::mat>("input");
+
+  size_t newDimension = params.Get<int>("new_dimensionality");
+
+  ...
+
+  // Now save the results.
+  if (params.Has("output"))
+    params.Get<arma::mat>("output") = std::move(dataset);
+}
+```
+
+Documentation is automatically generated using those macros, and if compiled to
+a command-line program, when that program is run with `--help` the following is
+displayed:
+
+```
+$ mlpack_pca --help
+Principal Components Analysis
+
+  This program performs principal components analysis on the given dataset.  It
+  will transform the data onto its principal components, optionally performing
+  dimensionality reduction by ignoring the principal components with the
+  smallest eigenvalues.
+
+Required options:
+
+  --input_file [string]         Input dataset to perform PCA on.
+  --output_file [string]        Matrix to save modified dataset to.
+
+Options:
+
+  --help (-h)                   Default help info.
+  --info [string]               Get help on a specific module or option.
+                                Default value ''.
+  --new_dimensionality [int]    Desired dimensionality of output dataset.
+                                Default value 0.
+  --verbose (-v)                Display informational messages and the full list
+                                of parameters and timers at the end of
+                                execution.
+```
+
+The `mlpack::IO` source code can be consulted for further and complete
+documentation.  Also useful is to look at other example bindings, found in
+`src/mlpack/methods/`.
diff -pruN 3.4.2-7/doc/developer/kernels.md 4.0.1-1/doc/developer/kernels.md
--- 3.4.2-7/doc/developer/kernels.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/developer/kernels.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,152 @@
+# The KernelType policy in mlpack
+
+Kernel methods make up a large class of machine learning techniques.  Each of
+these methods is characterized by its dependence on a *kernel function*.  In
+rough terms, a kernel function is a general notion of similarity between two
+points, with its value large when objects are similar and its value small when
+objects are dissimilar (note that this is not the only interpretation of what a
+kernel is).
+
+A kernel (or 'Mercer kernel') `K(a, b)` takes two objects as input and returns
+some sort of similarity value.  The specific details and properties of kernels
+are outside the scope of this documentation; for a better introduction to
+kernels and kernel methods, there are numerous better resources available,
+including
+[Eric Kim's tutorial](http://www.eric-kim.net/eric-kim-net/posts/1/kernel_trick.html).
+
+mlpack implements a number of kernel methods and, accordingly, each of these
+methods allows arbitrary kernels to be used via the `KernelType` template
+parameter.  Like the [MetricType policy](metrictype.md), the requirements are
+quite simple: a class implementing the `KernelType` policy must have
+
+ - an `Evaluate()` function
+ - a default constructor
+
+The signature of the `Evaluate()` function is straightforward:
+
+```c++
+template<typename VecTypeA, typename VecTypeB>
+double Evaluate(const VecTypeA& a, const VecTypeB& b);
+```
+
+The function takes two vector arguments, `a` and `b`, and returns a `double`
+that is the evaluation of the kernel between the two arguments.  So, for a
+particular kernel `K`, the `Evaluate()` function should return `K(a, b)`.
+
+The arguments `a` and `b`, of types `VecTypeA` and `VecTypeB`, respectively,
+will be an Armadillo-like vector type (usually `arma::vec`, `arma::sp_vec`, or
+similar).  In general it should be valid to assume that `VecTypeA` is a class
+with the same API as `arma::vec`.
+
+Note that for kernels that do not hold any state, the `Evaluate()` method can be
+marked as `static`.
+
+Overall, the `KernelType` template policy is quite simple (much like the
+[MetricType policy](metrictype.md)).  Below is an example kernel class, which
+outputs `1` if the vectors are close and `0` otherwise.
+
+```c++
+class ExampleKernel
+{
+  // Default constructor is required.
+  ExampleKernel() { }
+
+  // The example kernel holds no state, so we can mark Evaluate() as static.
+  template<typename VecTypeA, typename VecTypeB>
+  static double Evaluate(const VecTypeA& a, const VecTypeB& b)
+  {
+    // Get how far apart the vectors are (using the Euclidean distance).
+    const double distance = arma::norm(a - b);
+
+    if (distance < 0.05) // Less than 0.05 distance is "close".
+      return 1;
+    else
+      return 0;
+  }
+};
+```
+
+Then, this kernel may be easily used inside of mlpack algorithms.  For instance,
+the code below runs kernel PCA (`KernelPCA`) on a random dataset using the
+`ExampleKernel`.  The results are saved to a file called `results.csv`.  (Note
+that this is simply an example to demonstrate usage, and this example kernel
+isn't actually likely to be useful in practice.)
+
+```c++
+#include <mlpack.hpp>
+#include "example_kernel.hpp" // Contains the ExampleKernel class.
+
+using namespace mlpack;
+using namespace arma;
+
+int main()
+{
+  // Generate the random dataset; 10 dimensions, 5000 points.
+  mat dataset = randu<mat>(10, 5000);
+
+  // Instantiate the KernelPCA object with the ExampleKernel kernel type.
+  KernelPCA<ExampleKernel> kpca;
+
+  // The dataset will be transformed using kernel PCA with the example kernel to
+  // contain only 2 dimensions.
+  kpca.Apply(dataset, 2);
+
+  // Save the results to 'results.csv'.
+  data::Save(dataset, "results.csv");
+}
+```
+
+## The `KernelTraits` trait class
+
+Some algorithms that use kernels can specialize if the kernel fulfills some
+certain conditions.  An example of a condition might be that the kernel is
+shift-invariant or that the kernel is normalized.  In the case of fast
+max-kernel search (`mlpack::fastmks::FastMKS`), the computation can be
+accelerated if the kernel is normalized.  For this reason, the `KernelTraits`
+trait class exists.  This allows a kernel to specify via a `const static bool`
+when these types of conditions are satisfied.  *Note that a KernelTraits class
+is not required,* but may be helpful.
+
+The `KernelTraits` trait class is a template class that takes a `KernelType` as
+a parameter, and exposes `const static bool` values that depend on the kernel.
+Setting these values is achieved by specialization.  The code below provides an
+example, specializing `KernelTraits` for the `ExampleKernel` from earlier:
+
+```c++
+template<>
+class KernelTraits<ExampleKernel>
+{
+ public:
+  //! The example kernel is normalized (K(x, x) = 1 for all x).
+  const static bool IsNormalized = true;
+};
+```
+
+At this time, there is only one kernel trait that is used in mlpack code:
+
+ - `IsNormalized` (defaults to `false`): if `K(x, x) = 1` for all `x`,
+   then the kernel is normalized and this should be set to `true`.
+
+## List of kernels and classes that use a `KernelType`
+
+mlpack comes with a number of pre-written kernels that satisfy the `KernelType`
+policy:
+
+ - `LinearKernel`
+ - `ExampleKernel` -- an example kernel with more documentation
+ - `GaussianKernel`
+ - `HyperbolicTangentKernel`
+ - `EpanechnikovKernel`
+ - `CosineDistance`
+ - `LaplacianKernel`
+ - `PolynomialKernel`
+ - `TriangularKernel`
+ - `SphericalKernel`
+ - `PSpectrumStringKernel` -- operates on strings, not vectors
+
+These kernels (or a custom kernel) may be used in a variety of mlpack methods:
+
+ - `KernelPCA` - kernel principal components analysis
+ - `FastMKS` - fast max-kernel search
+ - `NystroemMethod` - the Nystroem method for sampling
+ - `IPMetric` - a metric built on a kernel
diff -pruN 3.4.2-7/doc/developer/metrics.md 4.0.1-1/doc/developer/metrics.md
--- 3.4.2-7/doc/developer/metrics.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/developer/metrics.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,108 @@
+# The MetricType policy in mlpack
+
+Many machine learning methods operate with some sort of metric, and often, this
+metric can be any arbitrary metric.  For instance, consider the problem of
+nearest neighbor search; one can find the nearest neighbor of a point with
+respect to the standard Euclidean distance, or the Manhattan (city-block)
+distance.  The actual search techniques, though, remain the same.  And this is
+true of many machine learning methods: the specific metric that is used can be
+any valid metric.
+
+mlpack algorithms, when relevant, allow the use of an arbitrary metric via the
+use of the `MetricType` template parameter.  Any metric passed as a `MetricType`
+template parameter will need to have
+
+ - an `Evaluate()` function
+ - a default constructor.
+
+The signature of the `Evaluate()` function is straightforward:
+
+```c++
+template<typename VecTypeA, typename VecTypeB>
+double Evaluate(const VecTypeA& a, const VecTypeB& b);
+```
+
+The function takes two vector arguments, `a` and `b`, and returns a `double`
+that is the evaluation of the metric between the two arguments.  So, for a
+particular metric `d`, the `Evaluate()` function should return `d(a, b)`.
+
+The arguments `a` and `b`, of types `VecTypeA` and `VecTypeB`, respectively,
+will be an Armadillo-like vector type (usually `arma::vec`, `arma::sp_vec`, or
+similar).  In general it should be valid to assume that `VecTypeA` is a class
+with the same API as `arma::vec`.
+
+Note that for metrics that do not hold any state, the `Evaluate()` method can
+be marked as `static`.
+
+Overall, the `MetricType` template policy is quite simple (much like the
+[KernelType policy](kerneltype.md)).  Below is an example metric class, which
+implements the L2 distance:
+
+```c++
+class ExampleMetric
+{
+  // Default constructor is required.
+  ExampleMetric() { }
+
+  // The example metric holds no state, so we can mark Evaluate() as static.
+  template<typename VecTypeA, typename VecTypeB>
+  static double Evaluate(const VecTypeA& a, const VecTypeB& b)
+  {
+    // Return the L2 norm of the difference between the points, which is the
+    // same as the L2 distance.
+    return arma::norm(a - b);
+  }
+};
+```
+
+Then, this metric can easily be used inside of other mlpack algorithms.  For
+example, the code below runs range search on a random dataset with the
+`ExampleKernel`, by instantiating a `RangeSearch` object that uses the
+`ExampleKernel`.  Then, the number of results are printed.  The `RangeSearch`
+class takes three template parameters: `MetricType`, `MatType`, and `TreeType`.
+(All three have defaults, so we will just leave `MatType` and `TreeType` to
+their defaults.)
+
+```c++
+#include <mlpack.hpp>
+#include "example_metric.hpp" // A file that contains ExampleKernel.
+
+using namespace mlpack;
+using namespace std;
+
+int main()
+{
+  // Create a random dataset with 10 dimensions and 5000 points.
+  arma::mat data = arma::randu<arma::mat>(10, 5000);
+
+  // Instantiate the RangeSearch object with the ExampleKernel.
+  RangeSearch<ExampleKernel> rs(data);
+
+  // These vectors will store the results.
+  vector<vector<size_t>> neighbors;
+  vector<vector<double>> distances;
+
+  // Create a random 10-dimensional query point.
+  arma::vec query = arma::randu<arma::vec>(10);
+
+  // Find those points with distance (according to ExampleMetric) between 1 and
+  // 2 from the query point.
+  rs.Search(query, Range(1.0, 2.0), neighbors, distances);
+
+  // Now, print the number of points inside the desired range.  We know that
+  // neighbors and distances will have length 1, since there was only one query
+  // point.
+  cout << neighbors[0].size() << " points within the range [1.0, 2.0] of the "
+      << "query point!" << endl;
+}
+```
+
+mlpack comes with a number of pre-written metrics that satisfy the `MetricType`
+policy:
+
+ - `ManhattanDistance`
+ - `EuclideanDistance`
+ - `ChebyshevDistance`
+ - `MahalanobisDistance`
+ - `LMetric` (for arbitrary L-metrics)
+ - `IPMetric` (requires a [KernelType](kerneltype.md) parameter)
diff -pruN 3.4.2-7/doc/developer/timer.md 4.0.1-1/doc/developer/timer.md
--- 3.4.2-7/doc/developer/timer.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/developer/timer.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,68 @@
+# mlpack Timers
+
+mlpack provides a simple timer interface for the timing of machine learning
+methods.  The results of any timers used during the program are displayed at
+output by any command-line binding, when `--verbose` is given:
+
+```sh
+$ mlpack_knn -r dataset.csv -n neighbors_out.csv -d distances_out.csv -k 5 -v
+<...>
+[INFO ] Program timers:
+[INFO ]   computing_neighbors: 0.010650s
+[INFO ]   loading_data: 0.002567s
+[INFO ]   saving_data: 0.001115s
+[INFO ]   total_time: 0.149816s
+[INFO ]   tree_building: 0.000534s
+```
+
+## Timer API
+
+In C++, the `mlpack::Timers` class can be used to add timers to a program.  The
+`mlpack::Timers` class provides three simple methods:
+
+```c++
+void Timer::Start(const char* name);
+void Timer::Stop(const char* name);
+timeval Timer::Get(const char* name);
+```
+
+Every binding is called with an `mlpack::Timers&`, which can be used in the body
+of that binding.  For the sake of this discussion, let us call that object
+`timers`.
+
+Each timer is given a name, and is referenced by that name.  You can call
+`timers.Start()` and `timers.Stop()` multiple times for a particular timer name,
+and the result will be the sum of the runs of the timer.  Note that
+`timers.Stop()` must be called before `timers.Start()` is called again,
+otherwise a `std::runtime_error` exception will be thrown.
+
+A `"total_time"` timer is run automatically for each mlpack binding.
+
+## Timer Example
+
+Below is a very simple example of timer usage in code.
+
+```c++
+#include <mlpack/core.hpp>
+#include <mlpack/core/util/io.hpp>
+#define BINDING_TYPE BINDING_TYPE_CLI
+#include <mlpack/core/util/mlpack_main.hpp>
+
+using namespace mlpack;
+
+void BINDING_FUNCTION(util::Params& params, util::Timers& timers)
+{
+  // Start a timer.
+  timers.Start("some_timer");
+
+  // Do some things.
+  DoSomeStuff();
+
+  // Stop the timer.
+  timers.Stop("some_timer");
+}
+@endcode
+
+If the `verbose` flag was given to this binding, then a command-line binding
+would print the time that `"some_timer"` ran for at the end of the program's
+output.
diff -pruN 3.4.2-7/doc/developer/trees.md 4.0.1-1/doc/developer/trees.md
--- 3.4.2-7/doc/developer/trees.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/developer/trees.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,851 @@
+# The TreeType policy in mlpack
+
+Trees are an important data structure in mlpack and are used in a number of the
+machine learning algorithms that mlpack implements.  Often, the use of trees can
+allow significant acceleration of an algorithm; this is generally done by
+pruning away large parts of the tree during computation.
+
+Most mlpack algorithms that use trees are not tied to a specific tree but
+instead allow the user to choose a tree via the `TreeType` template parameter.
+Any tree passed as a `TreeType` template parameter will need to implement a
+certain set of functions.  In addition, a tree may optionally specify some
+traits about itself with the `TreeTraits` trait class.
+
+This document aims to clarify the abstractions underlying mlpack trees, list and
+describe the required functionality of the `TreeType` policy, and point users
+towards existing types of trees.
+
+Although this document is long, there may still be errors and unclear areas.  If
+you are having trouble understanding anything, please get in touch on Github or
+on the mailing list and someone will help you (and possibly update the
+documentation afterwards).
+
+## What is a tree?
+
+In mlpack, we assume that we have some sort of data matrix, which might be
+sparse or dense (that is, it could be of type `arma::mat` or `arma::sp_mat`,
+or any variant that implements the Armadillo API).  This data matrix corresponds
+to a collection of points in some space (usually a Euclidean space).  A tree is
+a way of organizing this data matrix in a hierarchical manner---so, points that
+are nearby should lie in similar nodes.
+
+We can rigorously define what a tree is, using the definition of *space tree*
+introduced in the following paper:
+
+```c++
+R.R. Curtin, W.B. March, P. Ram, D.V. Anderson, A.G. Gray, and C.L. Isbell Jr.,
+"Tree-independent dual-tree algorithms," in Proceedings of the 30th
+International Conference on Machine Learning (ICML '13), pp. 1435--1443, 2013.
+```
+
+The definition is:
+
+A *space tree* on a dataset `S` in `R^(N x d)` is an undirected, connected,
+acyclic, rooted simple graph with the following properties:
+
+ - Each node (or vertex) holds a number of points (possibly zero) and is
+connected to one parent node and a number of child nodes (possibly zero).
+
+ - There is one node in every space tree with no parent; this is the root node
+of the tree.
+
+ - Each point in `S` is contained in at least one node.
+
+ - Each node corresponds to some subset of `R^d` that contains each point in the
+   node and also the subsets that correspond to each child of the node.
+
+This is really a quite straightforward definition: a tree is hierarchical, and
+each node corresponds to some region of the input space.  Each node may have
+some number of children, and may hold some number of points.  However, there is
+an important terminology distinction to make: the term *points held by a node*
+has a different meaning than the term *descendant points held by a node*.  The
+points held in a node are just that---points held only in the node.  The
+descendant points of a node are the combination of the points held in a node
+with the points held in the node's children and the points held in the node's
+children's children (and so forth).  For the purposes of clarity in all
+discussions about trees, care is taken to differentiate the terms "descendant
+point" and "point".
+
+Now, it's also important to note that a point does not *need* to hold any
+children, and that a node *can* hold the same points as its children (or its
+parent).  Some types of trees do this.  For instance, each node in the cover
+tree holds only one point, and may have a child that holds the same point.  As
+another example, the `kd`-tree holds its points only in the leaves (at the
+bottom of the tree).  More information on space trees can be found in either the
+"Tree-independent dual-tree algorithms" paper or any of the related literature.
+
+So there is a huge amount of possible variety in the types of trees that can
+fall into the class of *space trees*.  Therefore, it's important to treat them
+abstractly, and the `TreeType` policy allows us to do just that.  All we need
+to remember is that a node in a tree can be represented as the combination of
+some points held in the node, some child nodes, and some geometric structure
+that represents the space that all of the descendant points fall into (this is a
+restatement of the fourth part of the definition).
+
+## Template parameters required by the TreeType policy
+
+Most everything in mlpack is decomposed into a series of configurable template
+parameters, and trees are no exception.  In order to ease usage of high-level
+mlpack algorithms, each \c TreeType itself must be a template class taking three
+parameters:
+
+ - `MetricType` -- the underlying metric that the tree will be built on (see
+[the MetricType policy documentation](metrictype.md))
+ - `StatisticType` -- holds any auxiliary information that individual
+algorithms may need
+ - `MatType` -- the type of the matrix used to represent the data
+
+The reason that these three template parameters are necessary is so that each
+`TreeType` can be used as a template template parameter, which can radically
+simplify the required syntax for instantiating mlpack algorithms.  By using
+template template parameters, a user needs only to write
+
+```c++
+// The RangeSearch class takes a MetricType and a TreeType template parameter.
+
+// This code instantiates RangeSearch with the ManhattanDistance and a
+// QuadTree.  Note that the QuadTree itself is a template, and takes a
+// MetricType, StatisticType, and MatType, just like the policy requires.
+
+// This example ignores the constructor parameters, for the sake of simplicity.
+RangeSearch<ManhattanDistance, QuadTree> rs(...);
+```
+
+as opposed to the far more complicated alternative, where the user must specify
+the values of each template parameter of the tree type:
+
+```c++
+// This is a much worse alternative, where the user must specify the template
+// arguments of their tree.
+RangeSearch<ManhattanDistance,
+            QuadTree<ManhattanDistance, EmptyStatistic, arma::mat>> rs(...);
+```
+
+Unfortunately, the price to pay for this user convenience is that *every*
+`TreeType` must have three template parameters, and they must be in exactly
+that order.  Fortunately, there is an additional benefit: we are guaranteed that
+the tree is built using the same metric as the method (that is, a user can't
+specify different metric types to the algorithm and to the tree, which they can
+without template template parameters).
+
+There are two important notes about this:
+
+ - Not every possible input of `MetricType`, `StatisticType`, and/or `MatType`
+   necessarily need to be valid or work correctly for each type of tree.  For
+   instance, the `QuadTree` is limited to Euclidean metrics and will not work
+   otherwise.  Either compile-time static checks or detailed documentation can
+   help keep users from using invalid combinations of template arguments.
+
+ - Some types of trees have more template parameters than just these three.  One
+   example is the generalized binary space tree, where the bounding shape of
+   each node is easily made into a fourth template parameter (the
+   `BinarySpaceTree` class calls this the `BoundType` parameter), and the
+   procedure used to split a node is easily made into a fifth template parameter
+   (the `BinarySpaceTree` class calls this the `SplitType` parameter).  However,
+   the syntax of template template parameters *requires* that the class only has
+   the correct number of template parameters---no more, no less.  Fortunately,
+   C++11 allows template typedefs, which can be used to provide partial
+   specialization of template classes:
+
+```c++
+// This is the definition of the BinarySpaceTree class, which has five template
+// parameters.
+template<typename MetricType,
+         typename StatisticType,
+         typename MatType,
+         typename BoundType,
+         typename SplitType>
+class BinarySpaceTree;
+
+// The 'using' keyword gives us a template typedef, so we can define the
+// MeanSplitKDTree template class, which has three parameters and is a valid
+// TreeType policy class.
+template<typename MetricType, typename StatisticType, typename MatType>
+using MeanSplitKDTree = BinarySpaceTree<MetricType,
+                                        StatisticType,
+                                        MatType,
+                                        HRectBound<MetricType>
+                                        MeanSplit<BoundType, MetricType>>;
+```
+
+Now, the `MeanSplitKDTree` class has only three template parameters and can be
+used as a `TreeType` policy class in various mlpack algorithms.  Many types of
+trees in mlpack have more than three template parameters and rely on template
+typedefs to provide simplified `TreeType` interfaces.
+
+## The TreeType API
+
+As a result of the definition of *space tree* in the previous section, a
+simplified API presents itself quite easily.  However, more complex
+functionality is often necessary in mlpack, so this leads to more functions
+being necessary for a class to satisfy the `TreeType` policy.  Combining this
+with the template parameters required for trees given in the previous section
+gives us the complete API required for a class implementing the `TreeType`
+policy.  Below is the minimal set of functions required with minor documentation
+for each function.  (More extensive documentation and explanation is given
+afterwards.)
+
+```c++
+// The three template parameters will be supplied by the user, and are detailed
+// in the previous section.
+template<typename MetricType,
+         typename StatisticType,
+         typename MatType>
+class ExampleTree
+{
+ public:
+  //////////////////////
+  //// Constructors ////
+  //////////////////////
+
+  // This batch constructor does not modify the dataset, and builds the entire
+  // tree using a default-constructed MetricType.
+  ExampleTree(const MatType& data);
+
+  // This batch constructor does not modify the dataset, and builds the entire
+  // tree using the given MetricType.
+  ExampleTree(const MatType& data, MetricType& metric);
+
+  // Initialize the tree from a given cereal archive.  SFINAE (the
+  // second argument) is necessary to ensure that the archive is loading, not
+  // saving.
+  template<typename Archive>
+  ExampleTree(
+      Archive& ar,
+      const typename std::enable_if_c<typename Archive::is_loading>::type* = 0);
+
+  // Release any resources held by the tree.
+  ~ExampleTree();
+
+  // ///////////////////////// //
+  // // Basic functionality // //
+  // ///////////////////////// //
+
+  // Get the dataset that the tree is built on.
+  const MatType& Dataset();
+
+  // Get the metric that the tree is built with.
+  MetricType& Metric();
+
+  // Get/modify the StatisticType for this node.
+  StatisticType& Stat();
+
+  // Return the parent of the node, or NULL if this is the root.
+  ExampleTree* Parent();
+
+  // Return the number of children held by the node.
+  size_t NumChildren();
+  // Return the i'th child held by the node.
+  ExampleTree& Child(const size_t i);
+
+  // Return the number of points held in the node.
+  size_t NumPoints();
+  // Return the index of the i'th point held in the node.
+  size_t Point(const size_t i);
+
+  // Return the number of descendant nodes of this node.
+  size_t NumDescendantNodes();
+  // Return the i'th descendant node of this node.
+  ExampleTree& DescendantNode(const size_t i);
+
+  // Return the number of descendant points of this node.
+  size_t NumDescendants();
+  // Return the index of the i'th descendant point of this node.
+  size_t Descendant(const size_t i);
+
+  // Store the center of the bounding region of the node in the given vector.
+  void Center(arma::vec& center);
+
+  // ///////////////////////////////////////////////// //
+  // // More complex distance-related functionality // //
+  // ///////////////////////////////////////////////// //
+
+  // Return the distance between the center of this node and the center of
+  // its parent.
+  double ParentDistance();
+
+  // Return an upper bound on the furthest possible distance between the
+  // center of the node and any point held in the node.
+  double FurthestPointDistance();
+
+  // Return an upper bound on the furthest possible distance between the
+  // center of the node and any descendant point of the node.
+  double FurthestDescendantDistance();
+
+  // Return a lower bound on the minimum distance between the center and any
+  // edge of the node's bounding shape.
+  double MinimumBoundDistance();
+
+  // Return a lower bound on the minimum distance between the given point and
+  // the node.
+  template<typename VecType>
+  double MinDistance(VecType& point);
+
+  // Return a lower bound on the minimum distance between the given node and
+  // this node.
+  double MinDistance(ExampleTree& otherNode);
+
+  // Return an upper bound on the maximum distance between the given point and
+  // the node.
+  template<typename VecType>
+  double MaxDistance(VecType& point);
+
+  // Return an upper bound on the maximum distance between the given node and
+  // this node.
+  double MaxDistance(ExampleTree& otherNode);
+
+  // Return the combined results of MinDistance() and MaxDistance().
+  template<typename VecType>
+  Range RangeDistance(VecType& point);
+
+  // Return the combined results of MinDistance() and MaxDistance().
+  Range RangeDistance(ExampleTree& otherNode);
+
+  // //////////////////////////////////// //
+  // // Serialization (loading/saving) // //
+  // //////////////////////////////////// //
+
+  // Return a string representation of the tree.
+  std::string ToString() const;
+
+  // Serialize the tree (load from the given archive / save to the given
+  // archive, depending on its type).
+  template<typename Archive>
+  void serialize(Archive& ar, const uint32_t version);
+
+ protected:
+  // A default constructor; only meant to be used by cereal.  This
+  // must be protected so that cereal will work; it does not need
+  // to return a valid tree.
+  ExampleTree();
+
+  // Friend access must be given for the default constructor.
+  friend class cereal::access;
+};
+```
+
+Although this is significantly more complex than the four-item definition of
+*space tree* might suggest, it turns out many of these methods are not
+difficult to implement for most reasonable tree types.  It is also important to
+realize that this is a *minimum* API; you may implement more complex tree types
+at your leisure (and you may include more template parameters too, though you
+will have to use template typedefs to provide versions with three parameters;
+see the previous section).
+
+Before diving into the detailed documentation for each function, let us consider
+a few important points about the implications of this API:
+
+ - ***Trees are not default-constructible*** and should not (in general) provide
+   a default constructor.  This helps prevent invalid trees.  In general, any
+   instantiated mlpack object should be valid and ready to use---and a tree
+   built on no points is not valid or ready to use.
+
+ - ***Trees only need to provide batch constructors.***  Although many tree
+   types do have algorithms for incremental insertions, in mlpack this is not
+   required because the tree-based algorithms that mlpack implements generally
+   assume fully-built, non-modifiable trees.  For this purpose, batch
+   construction is perfectly sufficient.  (It's also worth pointing out that for
+   some types of trees, like kd-trees, the cost of a handful of insertions often
+   outweighs the cost of completely rebuilding the tree.)
+
+ - **Trees must provide a number of distance bounding functions.**  The utility
+   of trees generally stems from the ability to place quick bounds on
+   distance-related quantities.  For instance, if all the descendant points of a
+   node are bounded by a ball of radius `r` and the center of the node is a
+   point `c`, then the minimum distance between some point `p` and any
+   descendant point of the node is equal to the distance between `p` and `c`
+   minus the radius `r`: `d(p, c) - r`.  This is a fast calculation, and
+   (usually) provides a decent bound on the minimum distance between `p` and any
+   descendant point of the node.
+
+ - ***Trees need to be able to be serialized.***  mlpack uses the cereal library
+   for saving and loading objects.  Trees---which can be a part of machine
+   learning models---therefore must have the ability to be saved and loaded.
+   Making this all work requires a protected constructor (part of the API) and
+   generally makes it impossible to hold references instead of pointers
+   internally, because if a tree is loaded from a file then it must own the
+   dataset it is built on and the metric it uses (this also means that a
+   destructor must exist for freeing these resources).
+
+Now, we can consider each part of the API more rigorously.
+
+## Rigorous API documentation
+
+This section is divided into five parts, detailing each of the parts of the API
+
+### Template parameters
+
+An earlier section discussed the three different template parameters that are
+required by the `TreeType` policy.
+
+The [MetricType policy](metrictype.md) provides one method that will be useful
+for tree building and other operations:
+
+```c++
+// This function is required by the MetricType policy.
+// Evaluate the metric between two points (which may be of different types).
+template<typename VecTypeA, typename VecTypeB>
+double Evaluate(const VecTypeA& a, const VecTypeB& b);
+```
+
+Note that this method is not necessarily static, so a `MetricType` object should
+be held internally and its `Evaluate()` method should be called whenever the
+distance between two points is required.  *It is generally a bad idea to
+hardcode any distance calculation in your tree.*  This will make the tree unable
+to generalize to arbitrary metrics.  If your tree must depend on certain
+assumptions holding about the metric (i.e. the metric is a Euclidean metric),
+then make that clear in the documentation of the tree, so users do not try to
+use the tree with an inappropriate metric.
+
+The second template parameter, `StatisticType`, is for auxiliary information
+that is required by certain algorithms.  For instance, consider an algorithm
+which repeatedly uses the variance of the descendant points of a node.  It might
+be tempting to add a `Variance()` method to the required `TreeType` API, but
+this quickly leads to code bloat (after all, the API already has quite enough
+functions as it is).  Instead, it is better to create a `StatisticType` class
+which provides the `Variance()` method, and then call `Stat().Variance()` when
+the variance is required.  This also holds true for cached data members.
+
+Each node should have its own instance of a `StatisticType` class.  The
+`StatisticType` must provide the following constructors:
+
+```c++
+// Default constructor required by the StatisticType policy.
+StatisticType();
+
+// This constructor is required by the StatisticType policy.
+template<typename TreeType>
+StatisticType(TreeType& node);
+```
+
+This constructor should be called with `(*this)` after the node is constructed
+(usually, this ends up being the last line in the constructor of a node).
+
+The last template parameter is the `MatType` parameter.  This is generally
+`arma::mat` or `arma::sp_mat`, but could be any Armadillo type, including
+matrices that hold data points of different precisions (such as `float` or even
+`int`).  It generally suffices to write \c MatType assuming that `arma::mat`
+will be used, since the vast majority of the time this will be what is used.
+
+### Constructors and destructors
+
+The `TreeType` API requires at least three constructors.  Technically, it does
+not *require* a destructor, but almost certainly your tree class will be doing
+some memory management internally and should have one (though not always).
+
+The first two constructors are variations of the same idea:
+
+```c++
+// This batch constructor does not modify the dataset, and builds the entire
+// tree using a default-constructed MetricType.
+ExampleTree(const MatType& data);
+
+// This batch constructor does not modify the dataset, and builds the entire
+// tree using the given MetricType.
+ExampleTree(const MatType& data, MetricType& metric);
+```
+
+All that is required here is that a constructor is available that takes a
+dataset and optionally an instantiated metric.  If no metric is provided, then
+it should be assumed that the `MetricType` class has a default constructor and
+a default-constructed metric should be used.  The constructor *must* return a
+valid, fully-constructed, ready-to-use tree that satisfies the definition
+of *space tree* that was given earlier in the document.
+
+The third constructor requires the tree to be initializable from a `cereal`
+archive:
+
+```c++
+// Initialize the tree from a given cereal archive.  SFINAE (the
+// second argument) is necessary to ensure that the archive is loading, not
+// saving.
+template<typename Archive>
+ExampleTree(
+    Archive& ar,
+    const typename std::enable_if_c<typename Archive::is_loading>::type* = 0);
+```
+
+This has implications on how the tree must be stored.  In this case, the dataset
+is *not yet loaded* and therefore the tree ***may be required to have
+ownership of the data matrix***.  This means that realistically the most
+reasonable way to represent the data matrix internally in a tree class is not
+with a reference but instead with a pointer.  If this is true, then a destructor
+will be required:
+
+```c++
+// Release any resources held by the tree.
+~ExampleTree();
+```
+
+and, if the data matrix is represented internally with a pointer, this
+destructor will need to release the memory for the data matrix (in the case that
+the tree was created via `cereal`).
+
+Note that these constructors are not necessarily the only constructors that a
+`TreeType` implementation can provide.  One important example of when more
+constructors are useful is when the tree rearranges points internally; this
+might be desired for the sake of speed or memory optimization.  But to do this
+with the required constructors would necessarily incur a copy of the data
+matrix, because the user will pass a `const MatType&`.  One alternate solution
+is to provide a constructor which takes an rvalue reference to a `MatType`:
+
+```c++
+template<typename Archive>
+ExampleTree(MatType&& data);
+```
+
+(and another overload that takes an instantiated metric), and then the user can
+use `std::move()` to build the tree without copying the data matrix, although
+the data matrix will be modified:
+
+```c++
+ExampleTree exTree(std::move(dataset));
+```
+
+It is, of course, possible to add even more constructors if desired.
+
+### Basic tree functionality
+
+The basic functionality of a class implementing the `TreeType` API is quite
+straightforward and intuitive.
+
+```c++
+// Get the dataset that the tree is built on.
+const MatType& Dataset();
+```
+
+This should return a `const` reference to the dataset the tree is built on.  The
+fact that this function is required essentially means that each node in the tree
+must store a pointer to the dataset (this is not the only option, but it is the
+most obvious option).
+
+```c++
+// Get the metric that the tree is built with.
+MetricType& Metric();
+```
+
+Each node must also store an instantiated metric or a pointer to one (note that
+this is required even for metrics that have no state and have a `static`
+`Evaluate()` function).
+
+```c++
+// Get/modify the StatisticType for this node.
+StatisticType& Stat();
+```
+
+As discussed earlier, each node must hold a `StatisticType`; this is accessible
+through the `Stat()` function.
+
+```c++
+// Return the parent of the node, or NULL if this is the root.
+ExampleTree* Parent();
+
+// Return the number of children held by the node.
+size_t NumChildren();
+// Return the i'th child held by the node.
+ExampleTree& Child(const size_t i);
+
+// Return the number of points held in the node.
+size_t NumPoints();
+// Return the index of the i'th point held in the node.
+size_t Point(const size_t i);
+
+// Return the number of descendant nodes of this node.
+size_t NumDescendantNodes();
+// Return the i'th descendant node of this node.
+ExampleTree& DescendantNode(const size_t i);
+
+// Return the number of descendant points of this node.
+size_t NumDescendants();
+// Return the index of the i'th descendant point of this node.
+size_t Descendant(const size_t i);
+```
+
+These functions are all fairly self-explanatory.  Most algorithms will use the
+`Parent()`, `Children()`, `NumChildren()`, `Point()`, and `NumPoints()`
+functions, so care should be taken when implementing those functions to ensure
+they will be efficient.  Note that `Point()` and `Descendant()` should return
+indices of points, so the actual points can be accessed by calling
+`Dataset().col(Point(i))` for some index `i` (or something similar).
+
+An important note about the `Descendant()` function is that each descendant
+point should be unique.  So if a node holds the point with index 6 and it has
+one child that holds the points with indices 6 and 7, then `NumDescendants()`
+should return 2, not 3.  The ordering in which the descendants are returned can
+be arbitrary; so, `Descendant(0)` can return 6 *or* 7, and `Descendant(1)`
+should return the other index.
+
+```c++
+// Store the center of the bounding region of the node in the given vector.
+void Center(arma::vec& center);
+```
+
+The last function, `Center()`, should calculate the center of the bounding shape
+and store it in the given vector.  So, for instance, if the tree is a ball tree,
+then the center is simply the center of the ball.  Algorithm writers would be
+wise to try and avoid the use of `Center()` if possible, since it will
+necessarily cost a copy of a vector.
+
+### Complex tree functionality and bounds
+
+A node in a tree should also be able to calculate various distance-related
+bounds; these are particularly useful in tree-based algorithms.  Note that any
+of these bounds does not necessarily need to be maximally tight; generally it is
+more important that each bound can be easily calculated.
+
+Details on each bounding function that the `TreeType` API requires are given
+below.
+
+```c++
+// Return the distance between the center of this node and the center of
+// its parent.
+double ParentDistance();
+```
+
+Remember that each node corresponds to some region in the space that the dataset
+lies in.  For most tree types this shape is often something geometrically
+simple: a ball, a cone, a hyperrectangle, a slice, or something similar.  The
+`ParentDistance()` function should return the distance between the center of
+this node's region and the center of the parent node's region.
+
+In practice this bound is often used in dual-tree (or single-tree) algorithms to
+place an easy `MinDistance()` (or `MaxDistance()`) bound for a child node; the
+parent's `MinDistance()` (or `MaxDistance()`) function is called and then
+adjusted with `ParentDistance()` to provide a possibly loose but efficient bound
+on what the result of `MinDistance()` (or `MaxDistance()`) would be with the
+child.
+
+```c++
+// Return an upper bound on the furthest possible distance between the
+// center of the node and any point held in the node.
+double FurthestPointDistance();
+
+// Return an upper bound on the furthest possible distance between the
+// center of the node and any descendant point of the node.
+double FurthestDescendantDistance();
+```
+
+It is often very useful to be able to bound the radius of a node, which is
+effectively what `FurthestDescendantDistance()` does.  Often it is easiest to
+simply calculate and cache the furthest descendant distance at tree construction
+time.  Some trees, such as the cover tree, are able to give guarantees that the
+points held in the node will necessarily be closer than the descendant points;
+therefore, the `FurthestPointDistance()` function is also useful.
+
+It is permissible to simply have `FurthestPointDistance()` return the result of
+`FurthestDescendantDistance()`, and that will still be a valid bound, but
+depending on the type of tree it may be possible to have
+`FurthestPointDistance()` return a tighter bound.
+
+```c++
+// Return a lower bound on the minimum distance between the center and any
+// edge of the node's bounding shape.
+double MinimumBoundDistance();
+```
+
+This is, admittedly, a somewhat complex and weird quantity.  It is one of the
+less important bounding functions, so it is valid to simply return 0...
+
+The bound is a bound on the minimum distance between the center of the node and
+any edge of the shape that bounds all of the descendants of the node.  So, if
+the bounding shape is a ball (as in a ball tree or a cover tree), then
+`MinimumBoundDistance()` should just return the radius of the ball.  If the
+bounding shape is a hypercube (as in a generalized octree), then
+`MinimumBoundDistance()` should return the side length divided by two.  If the
+bounding shape is a hyperrectangle (as in a kd-tree or a spill tree), then
+`MinimumBoundDistance()` should return half the side length of the
+hyperrectangle's smallest side.
+
+```c++
+// Return a lower bound on the minimum distance between the given point and
+// the node.
+template<typename VecType>
+double MinDistance(VecType& point);
+
+// Return a lower bound on the minimum distance between the given node and
+// this node.
+double MinDistance(ExampleTree& otherNode);
+
+// Return an upper bound on the maximum distance between the given point and
+// the node.
+template<typename VecType>
+double MaxDistance(VecType& point);
+
+// Return an upper bound on the maximum distance between the given node and
+// this node.
+double MaxDistance(ExampleTree& otherNode);
+
+// Return the combined results of MinDistance() and MaxDistance().
+template<typename VecType>
+Range RangeDistance(VecType& point);
+
+// Return the combined results of MinDistance() and MaxDistance().
+Range RangeDistance(ExampleTree& otherNode);
+```
+
+These six functions are almost without a doubt the most important functionality
+of a tree.  Therefore, it is preferable that these methods be implemented as
+efficiently as possible, as they may potentially be called many millions of
+times in a tree-based algorithm.  It is also preferable that these bounds be as
+tight as possible.  In tree-based algorithms, these are used for pruning away
+work, and tighter bounds mean that more pruning is possible.
+
+Of these six functions, there are only really two bounds that are desired here:
+the *minimum distance* between a node and an object, and the *maximum distance*
+between a node and an object.  The object may be either a vector (usually
+`arma::vec`) or another tree node.
+
+Consider the first case, where the object is a vector.  The result of
+`MinDistance()` needs to be less than or equal to the true minimum distance,
+which could be calculated as below:
+
+```c++
+// We assume that we have a vector 'vec', and a tree node 'node'.
+double trueMinDist = DBL_MAX;
+for (size_t i = 0; i < node.NumDescendants(); ++i)
+{
+  const double dist = node.Metric().Evaluate(vec,
+      node.Dataset().col(node.Descendant(i)));
+  if (dist < trueMinDist)
+    trueMinDist = dist;
+}
+// At the end of the loop, trueMinDist will hold the true minimum distance
+// between 'vec' and any descendant point of 'node'.
+```
+
+Often the bounding shape of a node will allow a quick calculation that will make
+a reasonable bound.  For instance, if the node's bounding shape is a ball with
+radius `r` and center `ctr`, the calculation is simply
+`(node.Metric().Evaluate(vec, ctr) - r)`.  Usually a good `MinDistance()` or
+`MaxDistance()` function will make only one call to the `Evaluate()` function of
+the metric.
+
+The `RangeDistance()` function allows a way for both bounds to be calculated at
+once.  It is possible to implement this as a call to `MinDistance()` followed by
+a call to `MaxDistance()`, but this may incur more metric `Evaluate()` calls
+than necessary.  Often calculating both bounds at once can be more efficient and
+can be done with fewer `Evaluate()` calls than calling both `MinDistance()` and
+`MaxDistance()`.
+
+### Serialization
+
+The last functions that the `TreeType` API requires are for serialization.
+
+```c++
+// Serialize the tree (load from the given archive / save to the given
+// archive, depending on its type).
+template<typename Archive>
+void serialize(Archive& ar, const unsigned int version);
+
+protected:
+// A default constructor; only meant to be used by cereal.  This
+// must be protected so that cereal will work; it does not need
+// to return a valid tree.
+ExampleTree();
+
+// Friend access must be given for the default constructor.
+friend class cereal::access;
+```
+
+On the other hand, the specifics of the functionality required for the
+`serialize()` function are somewhat more difficult.  The `serialize()` function
+will be called either when a tree is being saved to disk or loaded from disk.
+The `cereal` documentation is fairly comprehensive.
+
+An important note is that it is very difficult to use references with `cereal`,
+because `serialize()` may be called at any time during the object's lifetime,
+and references cannot be re-seated.  In general this will require the use of
+pointers, which then require manual memory management.  Therefore, be careful
+that `serialize()` (and the tree's destructor) properly handle memory
+management!
+
+## The TreeTraits trait class
+
+Some tree-based algorithms can specialize if the tree fulfills certain
+conditions.  For instance, if the regions represented by two sibling nodes
+cannot overlap, an algorithm may be able to perform a simpler computation.
+Based on this reasoning, the `TreeTraits` trait class (much like the
+`KernelTraits` class) exists in order to allow a tree to specify (via a `const
+static bool`) when these types of conditions are satisfied.  ***Note that a
+TreeTraits class is not required***, but may be helpful.
+
+The `TreeTraits` trait class is a template class that takes a `TreeType` as a
+parameter, and exposes `const static bool` values that depend on the tree.
+Setting these values is achieved by specialization.  The code below shows the
+default `TreeTraits` values (these are the values that will be used if no
+specialization is provided for a given `TreeType`).
+
+```c++
+template<typename TreeType>
+class TreeTraits
+{
+ public:
+  // This is true if the subspaces represented by the children of a node can
+  // overlap.
+  static const bool HasOverlappingChildren = true;
+
+  // This is true if Point(0) is the centroid of the node.
+  static const bool FirstPointIsCentroid = false;
+
+  // This is true if the points contained in the first child of a node
+  // (Child(0)) are also contained in that node.
+  static const bool HasSelfChildren = false;
+
+  // This is true if the tree rearranges points in the dataset when it is built.
+  static const bool RearrangesDataset = false;
+
+  // This is true if the tree always has only two children.
+  static const bool BinaryTree = false;
+};
+```
+
+An example specialization for the `:KDTree` class is given below.  Note that
+`KDTree` is itself a template class (like every class satisfying the `TreeType`
+policy), so we are specializing to a template parameter.
+
+```c++
+template<typename MetricType,
+         typename StatisticType,
+         typename MatType>
+template<>
+class TreeTraits<KDTree<MetricType, StatisticType, MatType>>
+{
+ public:
+  // The regions represented by the two children of a node may not overlap.
+  static const bool HasOverlappingChildren = false;
+
+  // There is no guarantee that the first point of a node is the centroid.
+  static const bool FirstPointIsCentroid = false;
+
+  // Points are not contained at multiple levels (only at the leaves).
+  static const bool HasSelfChildren = false;
+
+  // Points are rearranged during the building of the tree.
+  static const bool RearrangesDataset = true;
+
+  // The tree is always binary.
+  static const bool BinaryTree = true;
+};
+```
+
+Currently, the traits available are each of the five detailed above.  For more
+information, see the `TreeTraits` source code for more documentation.
+
+## A list of trees in mlpack and more information
+
+mlpack contains several ready-to-use implementations of trees that satisfy the
+TreeType policy API:
+
+ - `KDTree`
+ - `MeanSplitKDTree`
+ - `BallTree`
+ - `MeanSplitBallTree`
+ - `RTree`
+ - `RStarTree`
+ - `StandardCoverTree`
+
+Often, these are template typedefs of more flexible tree classes:
+
+ - `BinarySpaceTree` -- binary trees, such as the KD-tree and ball tree
+ - `RectangleTree` -- the R tree and variants
+ - `CoverTree` -- the cover tree and variants
diff -pruN 3.4.2-7/doc/developer/version.md 4.0.1-1/doc/developer/version.md
--- 3.4.2-7/doc/developer/version.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/developer/version.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,25 @@
+# mlpack versions in code
+
+mlpack provides a couple of convenience macros and functions to get the version
+of mlpack.  More information (and straightforward code) can be found in
+`src/mlpack/core/util/version.hpp`.
+
+The following three macros provide major, minor, and patch versions of mlpack
+(i.e. for `mlpack-x.y.z`, `x` is the major version, `y` is the minor version,
+and `z` is the patch version):
+
+```c++
+MLPACK_VERSION_MAJOR
+MLPACK_VERSION_MINOR
+MLPACK_VERSION_PATCH
+```
+
+In addition, the function `mlpack::util::GetVersion()` returns the mlpack
+version as a string (for instance, `"mlpack 1.0.8"`).
+
+## mlpack command-line program versions
+
+Each mlpack command-line program supports the `--version` (or `-V`) option,
+which will print the version of mlpack used.  If the version is not an official
+release but instead from git, the version will be `mlpack git` (and will have a
+git revision SHA appended to `git`).
diff -pruN 3.4.2-7/doc/doxygen/extra-stylesheet.css 4.0.1-1/doc/doxygen/extra-stylesheet.css
--- 3.4.2-7/doc/doxygen/extra-stylesheet.css	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/doxygen/extra-stylesheet.css	1970-01-01 00:00:00.000000000 +0000
@@ -1,7 +0,0 @@
-/* Additional CSS styles for the html output */
-
-/* Fix the size of inline formulas */
-img.formulaInl {
-  vertical-align: middle;
-  height: 15pt;
-}
diff -pruN 3.4.2-7/doc/doxygen/footer.html 4.0.1-1/doc/doxygen/footer.html
--- 3.4.2-7/doc/doxygen/footer.html	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/doxygen/footer.html	1970-01-01 00:00:00.000000000 +0000
@@ -1,16 +0,0 @@
-<!-- start footer part -->
-<hr class="footer"/><address class="footer"><small>
-Generated by &#160;<a href="http://www.doxygen.org/index.html">
-<img class="footer" src="doxygen.png" alt="doxygen"/>
-</a> $doxygenversion
-</small></address>
-</body>
-<script type="text/javascript">
-var x = document.querySelectorAll("img.formulaDsp");
-var i;
-for (i = 0; i < x.length; i++)
-{
-  x[i].width = x[i].offsetWidth / 4;
-}
-</script>
-</html>
diff -pruN 3.4.2-7/doc/doxygen/stylesheet.css 4.0.1-1/doc/doxygen/stylesheet.css
--- 3.4.2-7/doc/doxygen/stylesheet.css	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/doxygen/stylesheet.css	1970-01-01 00:00:00.000000000 +0000
@@ -1,888 +0,0 @@
-/* The standard CSS for doxygen */
-
-body, table, div, p, dl {
-  font-family: Lucida Grande, Verdana, Geneva, Arial, sans-serif;
-  font-size: 12px;
-}
-
-/* @group Heading Levels */
-
-h1 {
-  font-size: 150%;
-  color: #ffffff;
-}
-
-.title {
-  font-size: 150%;
-  font-weight: bold;
-  margin: 10px 2px;
-  color: #ffffff;
-}
-
-h2 {
-  font-size: 120%;
-  color: #ffffff;
-}
-
-h3 {
-  font-size: 100%;
-  color: #ffffff;
-}
-
-dt {
-  font-weight: bold;
-}
-
-div.multicol {
-  -moz-column-gap: 1em;
-  -webkit-column-gap: 1em;
-  -moz-column-count: 3;
-  -webkit-column-count: 3;
-}
-
-p.startli, p.startdd, p.starttd {
-  margin-top: 2px;
-}
-
-p.endli {
-  margin-bottom: 0px;
-}
-
-p.enddd {
-  margin-bottom: 4px;
-}
-
-p.endtd {
-  margin-bottom: 2px;
-}
-
-/* @end */
-
-caption {
-  font-weight: bold;
-}
-
-span.legend {
-  font-size: 70%;
-  text-align: center;
-}
-
-h3.version {
-  font-size: 90%;
-  text-align: center;
-}
-
-div.qindex, div.navtab{
-  background-color: #000000;
-  border: 1px solid #333333;
-  text-align: center;
-  margin: 2px;
-  padding: 2px;
-}
-
-div.qindex, div.navpath {
-  width: 100%;
-  line-height: 140%;
-}
-
-div.navtab {
-  margin-right: 15px;
-}
-
-/* @group Link Styling */
-
-a {
-  color: #BB2222;
-  font-weight: normal;
-  text-decoration: none;
-}
-
-.contents a:visited {
-  color: #BB2222;
-}
-
-a:hover {
-  text-decoration: underline;
-}
-
-a.qindex {
-  font-weight: bold;
-}
-
-a.qindexHL {
-  font-weight: bold;
-  background-color: #9CAFD4;
-  color: #ffffff;
-  border: 1px double #869DCA;
-}
-
-.contents a.qindexHL:visited {
-  color: #ffffff;
-}
-
-a.el {
-  font-weight: bold;
-}
-
-a.elRef {
-
-}
-
-a.code {
-  color: #BB2222;
-}
-
-a.codeRef {
-  color: #BB2222;
-}
-
-/* @end */
-
-dl.el {
-  margin-left: -1cm;
-}
-
-.fragment {
-  font-family: monospace, fixed;
-  font-size: 105%;
-}
-
-pre.fragment {
-  border: 5px solid #1D1D1D;
-  background-color: #2D2D2D;
-  padding: 10px 10px 10px 10px;
-  page-break-before: avoid;
-  overflow: auto;
-  word-wrap: break-word;
-  font-size: 90%;
-  margin-left: 1.75em;
-  margin-right: 1.75em;
-  margin-top: 1em;
-  margin-bottom: 1em;
-  color: #ffffff;
-}
-
-div.ah {
-  background-color: black;
-  font-weight: bold;
-  color: #ffffff;
-  margin-bottom: 3px;
-  margin-top: 3px;
-  padding: 0.2em;
-  border: solid thin #333;
-}
-
-div.groupHeader {
-  margin-left: 16px;
-  margin-top: 12px;
-  font-weight: bold;
-}
-
-div.groupText {
-  margin-left: 16px;
-  font-style: italic;
-}
-
-body {
-  background: #000000;
-  color: #808080;
-  margin: 0;
-}
-
-div.contents {
-  margin-top: 10px;
-  margin-left: 10px;
-  margin-right: 5px;
-}
-
-td.indexkey {
-  background-color: #000000;
-  font-weight: bold;
-  border: 1px solid #333333;
-  margin: 2px 0px 2px 0;
-  padding: 2px 10px;
-}
-
-td.indexvalue {
-  background-color: #000000;
-  border: 1px solid #333333;
-  padding: 2px 10px;
-  margin: 2px 0px;
-}
-
-tr.memlist {
-  background-color: #EEF1F7;
-}
-
-p.formulaDsp {
-  text-align: center;
-}
-
-img.formulaDsp {
-
-}
-
-img.formulaInl {
-  vertical-align: middle;
-}
-
-div.center {
-  text-align: center;
-  margin-top: 0px;
-  margin-bottom: 0px;
-  padding: 0px;
-}
-
-div.center img {
-  border: 0px;
-}
-
-address.footer {
-  text-align: right;
-  padding-right: 12px;
-}
-
-img.footer {
-  border: 0px;
-  vertical-align: middle;
-}
-
-/* @group Code Colorization */
-
-span.keyword {
-  color: #FF0000;
-}
-
-span.keywordtype {
-  color: #FF00FF;
-}
-
-span.keywordflow {
-  color: #800080;
-}
-
-span.comment {
-  color: #00FFFF;
-}
-
-span.preprocessor {
-  color: #808080;
-}
-
-span.stringliteral {
-  color: #FFFF00;
-}
-
-span.charliteral {
-color: #FFFF00;
-}
-
-span.vhdldigit {
-  color: #FFFF00;
-}
-
-span.vhdlchar {
-  color: #FFFF00;
-}
-
-span.vhdlkeyword {
-  color: #FF0000;
-}
-
-span.vhdllogic {
-  color: #FF0000;
-}
-
-/* @end */
-
-/*
-   .search {
-color: #003399;
-font-weight: bold;
-}
-
-form.search {
-margin-bottom: 0px;
-margin-top: 0px;
-}
-
-input.search {
-font-size: 75%;
-color: #000080;
-font-weight: normal;
-background-color: #e8eef2;
-}
- */
-
-td.tiny {
-  font-size: 75%;
-}
-
-.dirtab {
-  padding: 4px;
-  border-collapse: collapse;
-  border: 1px solid #A3B4D7;
-}
-
-th.dirtab {
-  background: #EBEFF6;
-  font-weight: bold;
-}
-
-hr {
-  height: 0px;
-  border: none;
-  border-top: 3px solid #BB2222;
-}
-
-hr.footer {
-  height: 1px;
-}
-
-/* @group Member Descriptions */
-
-table.memberdecls {
-  border-spacing: 0px;
-  padding: 0px;
-}
-
-.mdescLeft, .mdescRight,
-.memItemLeft, .memItemRight,
-.memTemplItemLeft, .memTemplItemRight, .memTemplParams {
-  background-color: #000000;
-  border: none;
-  margin: 4px;
-  padding: 1px 0 0 8px;
-}
-
-.mdescLeft, .mdescRight {
-  padding: 0px 8px 4px 8px;
-  color: #555;
-}
-
-.memItemLeft, .memItemRight, .memTemplParams {
-  border-top: 1px solid #333333;
-}
-
-.memItemLeft, .memTemplItemLeft {
-  white-space: nowrap;
-}
-
-.memItemRight {
-  width: 100%;
-}
-
-.memTemplParams {
-  color: #FFFFFF;
-  white-space: nowrap;
-}
-
-/* @end */
-
-/* @group Member Details */
-
-/* Styles for detailed member documentation */
-
-.memtemplate {
-  color: #FFFFFF;
-  font-weight: bold;
-  margin-left: 8px;
-  font-family: Andalo Mono, Courier New, Courier, Lucida Typewrite, fixed;
-}
-
-.memnav {
-  background-color: #000000;
-  border: 1px solid #333333;
-  text-align: center;
-  margin: 2px;
-  margin-right: 15px;
-  padding: 2px;
-}
-
-.mempage {
-  width: 100%;
-}
-
-.memitem {
-  padding: 0;
-  margin-bottom: 10px;
-  margin-right: 5px;
-}
-
-.memname {
-  white-space: nowrap;
-  font-weight: bold;
-  margin-left: 6px;
-  font-family: Andale Mono, Courier New, Courier, Lucida Typewriter, fixed;
-}
-
-.memproto {
-  border-top: 1px solid #808080;
-  border-left: 1px solid #808080;
-  border-right: 1px solid #808080;
-  padding: 6px 0px 6px 0px;
-  color: #FFFFFF;
-  font-weight: bold;
-}
-
-.memdoc {
-  border-bottom: 1px solid #808080;
-  border-left: 1px solid #808080;
-  border-right: 1px solid #808080;
-  border-top: 1px solid #333333;
-  padding: 2px 5px;
-}
-
-.paramkey {
-  text-align: right;
-}
-
-.paramtype {
-  white-space: nowrap;
-  color: #808080;
-  font-family: Andale Mono, Courier New, Courier, Lucida Typewriter, fixed;
-}
-
-.paramname {
-  color: #BB2222;
-  white-space: nowrap;
-  font-family: Andale Mono, Courier New, Courier, Lucida Typewriter, fixed;
-}
-
-.paramname em {
-  font-style: normal;
-}
-
-.params, .retval, .exception, .tparams {
-  border-spacing: 6px 2px;
-}
-
-.params .paramname, .retval .paramname {
-  font-weight: bold;
-  vertical-align: top;
-}
-
-.params .paramtype {
-  font-style: italic;
-  vertical-align: top;
-}
-
-.params .paramdir {
-  font-family: "courier new",courier,monospace;
-  vertical-align: top;
-}
-
-/* @end */
-
-/* @group Directory (tree) */
-
-/* for the tree view */
-
-.ftvtree {
-  font-family: sans-serif;
-  margin: 0px;
-}
-
-/* these are for tree view when used as main index */
-
-.directory {
-  font-size: 9pt;
-  font-weight: bold;
-  margin: 5px;
-}
-
-.directory h3 {
-  margin: 0px;
-  margin-top: 1em;
-  font-size: 11pt;
-}
-
-/*
-   The following two styles can be used to replace the root node title
-   with an image of your choice.  Simply uncomment the next two styles,
-   specify the name of your image and be sure to set 'height' to the
-   proper pixel height of your image.
- */
-
-/*
-   .directory h3.swap {
-height: 61px;
-background-repeat: no-repeat;
-background-image: url("yourimage.gif");
-}
-.directory h3.swap span {
-display: none;
-}
- */
-
-.directory > h3 {
-  margin-top: 0;
-}
-
-.directory p {
-  margin: 0px;
-  white-space: nowrap;
-}
-
-.directory div {
-  display: none;
-  margin: 0px;
-}
-
-.directory img {
-  vertical-align: -30%;
-}
-
-/* these are for tree view when not used as main index */
-
-.directory-alt {
-  font-size: 100%;
-  font-weight: bold;
-}
-
-.directory-alt h3 {
-  margin: 0px;
-  margin-top: 1em;
-  font-size: 11pt;
-}
-
-.directory-alt > h3 {
-  margin-top: 0;
-}
-
-.directory-alt p {
-  margin: 0px;
-  white-space: nowrap;
-}
-
-.directory-alt div {
-  display: none;
-  margin: 0px;
-}
-
-.directory-alt img {
-  vertical-align: -30%;
-}
-
-/* @end */
-
-div.dynheader {
-  margin-top: 8px;
-}
-
-address {
-  font-style: normal;
-  color: #2A3D61;
-}
-
-table.doxtable {
-  border-collapse: collapse;
-}
-
-table.doxtable td, table.doxtable th {
-  border: 1px solid #2D4068;
-  padding: 3px 7px 2px;
-}
-
-table.doxtable th {
-  background-color: #374F7F;
-  color: #FFFFFF;
-  font-size: 110%;
-  padding-bottom: 4px;
-  padding-top: 5px;
-  text-align: left;
-}
-
-.tabsearch {
-  top: 0px;
-  left: 10px;
-  height: 36px;
-  background-image: url('tab_b.png');
-  z-index: 101;
-  overflow: hidden;
-  font-size: 13px;
-}
-
-.navpath ul {
-  font-size: 11px;
-  background: #000000;
-  color: #8AA0CC;
-  border-bottom: 1px solid #333333;
-  overflow: hidden;
-  margin: 0px;
-  padding-top: 0.25em;
-  padding-bottom: 0.25em;
-  padding-left: 0.5em;
-  padding-right: 0;
-  border-left: 1px solid #333333;
-}
-
-.navpath li {
-  list-style-type: none;
-  float: left;
-  padding-right: 0.5em;
-  color: #364D7C;
-  border-right: 1px solid #333333;
-  padding-left: 0.5em;
-}
-
-.navpath li.navelem a {
-  display: block;
-  text-decoration: none;
-  outline: none;
-}
-
-.navpath li.navelem a:hover {
-  color:#FFFFFF;
-}
-
-.navpath li.footer {
-  list-style-type: none;
-  float: right;
-  padding-left: 10px;
-  padding-right: 15px;
-  background-image: none;
-  background-repeat: no-repeat;
-  background-position: right;
-  color: #364D7C;
-  font-size: 8pt;
-}
-
-div.summary {
-  float: right;
-  font-size: 8pt;
-  padding-right: 5px;
-  width: 50%;
-  text-align: right;
-}
-
-div.summary a {
-  white-space: nowrap;
-}
-
-div.ingroups {
-  font-size: 8pt;
-  padding-left: 5px;
-  width: 50%;
-  text-align: left;
-}
-
-div.ingroups a {
-  white-space: nowrap;
-}
-
-div.header {
-  background-color: #000000;
-  margin: 0px;
-  border-bottom: 1px solid #333333;
-}
-
-div.headertitle {
-  padding: 5px 5px 5px 10px;
-}
-
-dl {
-  padding: 0 0 0 10px;
-}
-
-dl.note, dl.warning, dl.attention, dl.pre, dl.post, dl.invariant,
-dl.deprecated, dl.todo, dl.test, dl.bug {
-  border-left: 4px solid;
-  padding: 0 0 0 6px;
-}
-
-dl.note {
-  border-color: #D0C000;
-}
-
-dl.warning, dl.attention {
-  border-color: #FF0000;
-}
-
-dl.pre, dl.post, dl.invariant {
-  border-color: #00D000;
-}
-
-dl.deprecated {
-  border-color: #505050;
-}
-
-dl.todo {
-  border-color: #00C0E0;
-}
-
-dl.test {
-  border-color: #3030E0;
-}
-
-dl.bug {
-  border-color: #C08050;
-}
-
-#projectlogo {
-  text-align: center;
-  vertical-align: bottom;
-  border-collapse: separate;
-}
-
-#projectlogo img {
-  border: 0px none;
-}
-
-#projectname {
-  font: 300% Tahoma, Arial, sans-serif;
-  margin: 0px;
-  padding: 2px 0px;
-}
-
-#projectbrief {
-  font: 120% Tahoma, Arial, sans-serif;
-  margin: 0px;
-  padding: 0px;
-}
-
-#projectnumber {
-  font: 50% Tahoma, Arial,sans-serif;
-  margin: 0px;
-  padding: 0px;
-}
-
-#titlearea {
-  padding: 0px;
-  margin: 0px;
-  width: 100%;
-  border-bottom: 1px solid #808080;
-}
-
-.image {
-  text-align: center;
-}
-
-.dotgraph {
-  text-align: center;
-}
-
-.mscgraph {
-  text-align: center;
-}
-
-.caption {
-  font-weight: bold;
-}
-
-/** tab list at top of page */
-.tabs, .tabs2, .tabs3 {
-  background-image: none !important;
-  background: #000000;
-  border-left: 1px solid #333333;
-  border-right: 1px solid #333333;
-  border-bottom: 1px solid #333333;
-  min-height: 1.5em;
-}
-
-.tablist li {
-  background-image: none !important;
-  background: #000000;
-  border-right: 1px solid #333333;
-  height: auto !important;
-  padding-bottom: 0.25em;
-  padding-top: 0.25em;
-  line-height: 1em !important;
-}
-
-.tablist li.current {
-  background: #BB2222;
-}
-
-.tablist li.current a {
-  background-image: none !important;
-  text-shadow: none;
-  color: #ffffff;
-}
-
-.tablist a {
-  background-image: none !important;
-  text-shadow: none;
-  color: #ffffff;
-  font-weight: bold;
-}
-
-.tablist li:hover {
-  background: #333333;
-}
-
-.tablist li.current:hover {
-  background: #BB2222 !important;
-}
-
-/***
- * For trac-doxygen; these rules won't apply otherwise.
- */
-div.tabs span {
-  background-image: none !important;
-  background: transparent !important;
-  height: auto !important;
-  padding-bottom: 0.25em;
-  padding-top: 0.25em;
-  line-height: 1em !important;
-}
-
-div.tabs a {
-  background-image: none !important;
-  background: transparent !important;
-  border-bottom: none !important;
-  font-size: 100% !important;
-}
-
-div.tabs span {
-  padding-bottom: 0.25em;
-  padding-top: 0.25em;
-  color: #ffffff !important;
-}
-
-div.tabs li:hover {
-  background: #333333;
-}
-
-div.tabs li.current:hover {
-  background: #BB2222 !important;
-}
-
-div.tabs li.current {
-  background: #BB2222 !important;
-}
-
-div.tabs li {
-  border-right: 1px solid #333333;
-}
-
-div.tabs ul {
-  display: inline;
-  font-size: 100%;
-  padding-top: 0em;
-}
-
-/* I want the menus to display directly below the Trac menu. */
-#content {
-  padding-top: 0px;
-  margin-top: 0px;
-}
-
-div.tabs {
-  margin-bottom: 0px;
-  background-image: none;
-}
-
-div.nav {
-  border-bottom: 1px solid #808080;
-}
-
-/*** Fix the weird size of the menus */
-#mainnav {
-  font-size: 100% !important;
-}
-
-div#main div.nav {
-  min-height: 1em !important; /* We must have the right height for the menus. */
-  border-bottom: 1px solid #333333; /* The plugin was giving a blue border. */
-}
diff -pruN 3.4.2-7/doc/examples/sample-ml-app/README.txt 4.0.1-1/doc/examples/sample-ml-app/README.txt
--- 3.4.2-7/doc/examples/sample-ml-app/README.txt	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/examples/sample-ml-app/README.txt	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,39 @@
+This directory contains a Visual Studio solution showing the use of mlpack in a
+Visual Studio C++ project.
+
+However, you will need to set up your environment correctly first---or modify
+the project properties accordingly---in order to build and run the example.
+
+In short, you must download mlpack and four libraries, and install the sources
+and library files into C:\mlpack\.
+
+ * OpenBLAS: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
+   Download the .zip, and extract it into C:\mlpack\openblas-0.3.21\
+
+ * Armadillo: https://mlpack.org/files/armadillo-11.4.1.tar.gz
+   Download the .tar.gz, and extract it into C:\mlpack\armadillo-11.4.1\; note
+   that you may need to use a program such as 7Zip (https://www.7-zip.org/) to
+   unpack this archive.
+
+ * Cereal: https://github.com/USCiLab/cereal/archive/refs/tags/v1.3.2.zip
+   Download the .zip, and extract it into C:\mlpack\cereal-1.3.2\
+
+ * ensmallen: https://ensmallen.org/files/ensmallen-2.19.0.tar.gz
+   Download the .tar.gz, and extract it into C:\mlpack\ensmallen-2.19.0\
+
+Now, install mlpack into C:\mlpack\mlpack-4.0.0\.  If you downloaded the mlpack
+source, you can either use the Windows build guide (see
+doc/user/build_windows.md) to build and install, or, since mlpack is
+header-only, copy the src/ directory to C:\mlpack\mlpack-4.0.0\ and rename it
+"include" (so there will now be a directory C:\mlpack\mlpack-4.0.0\include\,
+which contains only base.hpp and the mlpack/ subdirectory).
+
+Alternately, if you downloaded the Windows MSI installer, you can install to
+C:\mlpack\mlpack-4.0.0\.
+
+Once all of that setup is done, the example should compile as-is.
+
+If your environment is different, or you have installed dependencies to
+different directory, just open the solution Properties and adjust the paths in
+Configuration Properties -> C/C++ -> General -> Additional Include Directories,
+and Configuration Properties -> Linker -> Input.
Binary files 3.4.2-7/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.cpp and 4.0.1-1/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.cpp differ
diff -pruN 3.4.2-7/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.vcxproj 4.0.1-1/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.vcxproj
--- 3.4.2-7/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.vcxproj	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.vcxproj	2022-12-29 15:40:18.000000000 +0000
@@ -1,172 +1,174 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{1D2743FF-3795-43A4-BA42-5942256630E7}</ProjectGuid>
-    <Keyword>Win32Proj</Keyword>
-    <RootNamespace>samplemlapp</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>Use</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <PrecompiledHeader>Use</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>false</ConformanceMode>
-      <AdditionalIncludeDirectories>C:\boost\boost_1_66_0;C:\mlpack\armadillo-8.500.1\include;C:\mlpack\mlpack-3.4.2\build\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>C:\mlpack\mlpack-3.4.2\build\Debug\mlpack.lib;C:\boost\boost_1_66_0\lib64-msvc-14.1\libboost_serialization-vc141-mt-gd-x64-1_66.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent>
-      <Command>xcopy /y "C:\mlpack\mlpack-3.4.2\build\Debug\mlpack.dll" $(OutDir)
-xcopy /y "C:\mlpack\mlpack-3.4.2\packages\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll" $(OutDir)
-xcopy /y "$(ProjectDir)..\..\..\..\src\mlpack\tests\data\german.csv" "$(ProjectDir)data\german.csv*"</Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>Use</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <PrecompiledHeader>Use</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClInclude Include="stdafx.h" />
-    <ClInclude Include="targetver.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="sample-ml-app.cpp" />
-    <ClCompile Include="stdafx.cpp">
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
-    </ClCompile>
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{1D2743FF-3795-43A4-BA42-5942256630E7}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>samplemlapp</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>C:\mlpack\armadillo-11.4.1\include;C:\mlpack\mlpack-4.0.1\include\;C:\mlpack\cereal-1.3.2\include;C:\mlpack\ensmallen-2.19.0\include\%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <OpenMPSupport>false</OpenMPSupport>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>C:\mlpack\openblas-0.3.21\lib\libopenblas.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent>
+      <Command>xcopy /y "C:\mlpack\openblas-0.3.21\bin\*.dll" $(OutDir)
+xcopy /y "$(ProjectDir)..\..\..\..\src\mlpack\tests\data\german.csv" "$(ProjectDir)data\german.csv*"</Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="stdafx.h" />
+    <ClInclude Include="targetver.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="sample-ml-app.cpp" />
+    <ClCompile Include="stdafx.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
 </Project>
Binary files 3.4.2-7/doc/examples/sample-ml-app/sample-ml-app/stdafx.h and 4.0.1-1/doc/examples/sample-ml-app/sample-ml-app/stdafx.h differ
diff -pruN 3.4.2-7/doc/guide/bindings.hpp 4.0.1-1/doc/guide/bindings.hpp
--- 3.4.2-7/doc/guide/bindings.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/bindings.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,1265 +0,0 @@
-/*! @page bindings mlpack automatic bindings to other languages
-
-@section bindings_overview Overview
-
-mlpack has a system to automatically generate bindings to other languages, such
-as Python and command-line programs, and it is extensible to other languages
-with some amount of ease.  The maintenance burden of this system is low, and it
-is designed in such a way that the bindings produced are always up to date
-across languages and up to date with the mlpack library itself.
-
-This document describes the full functioning of the system, and is a good place
-to start for someone who wishes to understand the system so that they can
-contribute a new binding language, or someone who wants to understand so they
-can adapt the system for use in their own project, or someone who is simply
-curious enough to see how the sausage is made.
-
-The document is split into several sections:
-
- - @ref bindings_intro
- - @ref bindings_code
- - @ref bindings_general
-    - @ref bindings_general_program_doc
-    - @ref bindings_general_define_params
-    - @ref bindings_general_functions
-    - @ref bindings_general_more
- - @ref bindings_structure
- - @ref bindings_cli
-    - @ref bindings_cli_mlpack_main
-    - @ref bindings_cli_matrix
-    - @ref bindings_cli_parsing
- - @ref bindings_python
-    - @ref bindings_python_matrix
-    - @ref bindings_python_model
-    - @ref bindings_python_setup_py
-    - @ref bindings_python_build_pyx
-    - @ref bindings_python_testing
- - @ref bindings_new
-
-@section bindings_intro Introduction
-
-C++ is not the most popular language on the planet, and it (unfortunately) can
-scare many away with its ultra-verbose error messages, confusing template rules,
-and complex metaprogramming techniques.  Most practitioners of machine learning
-tend to avoid writing native C++ and instead prefer other languages---probably
-most notably Python.
-
-In the case of Python, many projects will use tools like SWIG
-(http://www.swig.org/) to automatically generate bindings, or they might
-hand-write Cython.  The same types of strategies may be used for other
-languages; hand-written MEX files may be used for MATLAB, hand-written RCpp
-bindings might be used for R bindings, and so forth.
-
-However, these approaches have a fundamental flaw: the hand-written bindings
-must be maintained, and risk going out of date as the rest of the library
-changes or new functionality is added.  This incurs a maintenance burden: each
-major change to the library means that someone must update the bindings and test
-that they are still working.  mlpack is not prepared to handle this maintenance
-workload; therefore an alternate solution is needed.
-
-At the time of the design of this system, mlpack shipped headers for a C++
-library as well as many (~40) hand-written command-line programs that used the
-mlpack::IO object to manage command-line arguments.  These programs all had
-similar structure, and could be logically split into three sections:
-
- - parse the input options supplied by the user
- - run the machine learning algorithm
- - prepare the output to return to the user
-
-The user might interface with this command-line program like the following:
-
-@code
-$ mlpack_knn -r reference.csv -q query.csv -k 3 -d d.csv -n n.csv
-@endcode
-
-That is, they would pass a number of input options---some were numeric values
-(like @c -k @c 3 ); some were filenames (like @c -r @c reference.csv ); and a
-few other types also.  Therefore, the first stage of the program---parsing input
-options---would be handled by reading the command line and loading any input
-matrices.  Preparing the output, which usually consists of data matrices (i.e.
-@c -d @c d.csv ) involves saving the matrix returned by the algorithm to the
-user's desired file.
-
-Ideally, any binding to any language would have this same structure, and the
-actual "run the machine learning algorithm" code could be identical.  For
-MATLAB, for instance, we would not need to read the file @c reference.csv but
-instead the user would simply pass their data matrix as an argument.  So each
-input and output parameter would need to be handled differently, but the
-algorithm could be run identically across all bindings.
-
-Therefore, design of an automatically-generated binding system would simply
-involve generating the boilerplate code necessary to parse input options for a
-given language, and to return output options to a user.
-
-@section bindings_code Writing code that can be turned into a binding
-
-This section details what a binding file might actually look like.  It is good
-to have this API in mind when reading the following sections.
-
-Each mlpack binding is typically contained in the @c src/mlpack/methods/ folder
-corresponding to a given machine learning algorithm, with the suffix
-@c _main.cpp ; so an example is @c src/mlpack/methods/pca/pca_main.cpp .
-
-These files have roughly two parts:
-
- - definition of the input and output parameters with @c PARAM macros
- - implementation of @c mlpackMain(), which is the actual machine learning code
-
-Here is a simple example file:
-
-@code
-// This is a stripped version of mean_shift_main.cpp.
-#include <mlpack/prereqs.hpp>
-#include <mlpack/core/util/cli.hpp>
-#include <mlpack/core/util/mlpack_main.hpp>
-
-#include <mlpack/core/kernels/gaussian_kernel.hpp>
-#include "mean_shift.hpp"
-
-using namespace mlpack;
-using namespace mlpack::meanshift;
-using namespace mlpack::kernel;
-using namespace std;
-
-// Define the help text for the program.  The PRINT_PARAM_STRING() and
-// PRINT_DATASET() macros are used to print the name of the parameter as seen in
-// the binding type that is being used, and the PRINT_CALL() macro generates a
-// sample invocation of the program in the language of the binding type that is
-// being used.  Note that the macros must have + on either side of them.  We
-// provide some extra references with the "SEE_ALSO()" macro, which is used to
-// generate documentation for the website.
-
-// Program Name.
-BINDING_NAME("Mean Shift Clustering");
-
-// Short description.
-BINDING_SHORT_DESC(
-    "A fast implementation of mean-shift clustering using dual-tree range "
-    "search.  Given a dataset, this uses the mean shift algorithm to produce "
-    "and return a clustering of the data.");
-
-// Long description.
-BINDING_LONG_DESC(
-    "This program performs mean shift clustering on the given dataset, storing "
-    "the learned cluster assignments either as a column of labels in the input "
-    "dataset or separately."
-    "\n\n"
-    "The input dataset should be specified with the " +
-    PRINT_PARAM_STRING("input") + " parameter, and the radius used for search"
-    " can be specified with the " + PRINT_PARAM_STRING("radius") + " "
-    "parameter.  The maximum number of iterations before algorithm termination "
-    "is controlled with the " + PRINT_PARAM_STRING("max_iterations") + " "
-    "parameter."
-    "\n\n"
-    "The output labels may be saved with the " + PRINT_PARAM_STRING("output") +
-    " output parameter and the centroids of each cluster may be saved with the"
-    " " + PRINT_PARAM_STRING("centroid") + " output parameter.");
-
-// Example.
-BINDING_EXAMPLE(
-    "For example, to run mean shift clustering on the dataset " +
-    PRINT_DATASET("data") + " and store the centroids to " +
-    PRINT_DATASET("centroids") + ", the following command may be used: "
-    "\n\n" +
-    PRINT_CALL("mean_shift", "input", "data", "centroid", "centroids"));
-
-// See also...
-BINDING_SEE_ALSO("@kmeans", "#kmeans");
-BINDING_SEE_ALSO("@dbscan", "#dbscan");
-BINDING_SEE_ALSO("Mean shift on Wikipedia",
-        "https://en.wikipedia.org/wiki/Mean_shift");
-BINDING_SEE_ALSO("Mean Shift, Mode Seeking, and Clustering (pdf)",
-        "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.510.1222"
-        "&rep=rep1&type=pdf");
-BINDING_SEE_ALSO("mlpack::mean_shift::MeanShift C++ class documentation",
-        "@doxygen/classmlpack_1_1meanshift_1_1MeanShift.html");
-
-// Define parameters for the executable.
-
-// Required option: the user must give us a matrix.
-PARAM_MATRIX_IN_REQ("input", "Input dataset to perform clustering on.", "i");
-
-// Output options: the user can save the output matrix of labels and/or the
-// centroids.
-PARAM_UCOL_OUT("output", "Matrix to write output labels to.", "o");
-PARAM_MATRIX_OUT("centroid", "If specified, the centroids of each cluster will "
-    "be written to the given matrix.", "C");
-
-// Mean shift configuration options.
-PARAM_INT_IN("max_iterations", "Maximum number of iterations before mean shift "
-    "terminates.", "m", 1000);
-PARAM_DOUBLE_IN("radius", "If the distance between two centroids is less than "
-    "the given radius, one will be removed.  A radius of 0 or less means an "
-    "estimate will be calculated and used for the radius.", "r", 0);
-
-void mlpackMain()
-{
-  // Process the parameters that the user passed.
-  const double radius = IO::GetParam<double>("radius");
-  const int maxIterations = IO::GetParam<int>("max_iterations");
-
-  if (maxIterations < 0)
-  {
-    Log::Fatal << "Invalid value for maximum iterations (" << maxIterations <<
-        ")! Must be greater than or equal to 0." << endl;
-  }
-
-  // Warn, if the user did not specify that they wanted any output.
-  if (!IO::HasParam("output") && !IO::HasParam("centroid"))
-  {
-    Log::Warn << "--output_file, --in_place, and --centroid_file are not set; "
-        << "no results will be saved." << endl;
-  }
-
-  arma::mat dataset = std::move(IO::GetParam<arma::mat>("input"));
-  arma::mat centroids;
-  arma::Col<size_t> assignments;
-
-  // Prepare and run the actual algorithm.
-  MeanShift<> meanShift(radius, maxIterations);
-
-  Timer::Start("clustering");
-  Log::Info << "Performing mean shift clustering..." << endl;
-  meanShift.Cluster(dataset, assignments, centroids);
-  Timer::Stop("clustering");
-
-  Log::Info << "Found " << centroids.n_cols << " centroids." << endl;
-  if (radius <= 0.0)
-    Log::Info << "Estimated radius was " << meanShift.Radius() << ".\n";
-
-  // Should we give the user the output matrix?
-  if (IO::HasParam("output"))
-    IO::GetParam<arma::Col<size_t>>("output") = std::move(assignments);
-
-  // Should we give the user the centroid matrix?
-  if (IO::HasParam("centroid"))
-    IO::GetParam<arma::mat>("centroid") = std::move(centroids);
-}
-@endcode
-
-We can see that we have defined the basic program information in the
-@c BINDING_NAME(), @c BINDING_SHORT_DESC(), @c BINDING_LONG_DESC(),
-@c BINDING_EXAMPLE() and @c BINDING_SEE_ALSO() macros.  This is, for instance,
-what is displayed to describe the binding if the user passed the
-<tt>\--help</tt> option for a command-line program.
-
-Then, we define five parameters, three input and two output, that define the
-data and options that the mean shift clustering will function on.  These
-parameters are defined with the @c PARAM macros, of which there are many.  The
-names of these macros specify the type, whether the parameter is required, and
-whether the parameter is input or output.  Some examples:
-
- - @c PARAM_STRING_IN() -- a string-type input parameter
- - @c PARAM_MATRIX_OUT() -- a matrix-type output parameter
- - @c PARAM_DOUBLE_IN_REQ() -- a required double-type input parameter
- - @c PARAM_UMATRIX_IN() -- an unsigned matrix-type input parameter
- - @c PARAM_MODEL_IN() -- a serializable model-type input parameter
-
-Note that each of these macros may have slightly different syntax.  See the
-links above for further documentation.
-
-In order to write a new binding, then, you simply must write @c BINDING_NAME(),
-@c BINDING_SHORT_DESC(), @c BINDING_LONG_DESC(), @c BINDING_EXAMPLE() and
-@c BINDING_SEE_ALSO() definitions of the program with some docuentation, define
-the input and output parameters as @c PARAM macros, and then write an
-@c mlpackMain() function that actually performs the functionality of the binding.
-Inside of @c mlpackMain():
-
- - All input parameters are accessible through @c IO::GetParam<type>("name").
- - All output parameters should be set by the end of the function with the
-      @c IO::GetParam<type>("name") method.
-
-Then, assuming that your program is saved in the file @c program_name_main.cpp,
-generating bindings for other languages is a simple addition to the
-@c CMakeLists.txt file:
-
-@code
-add_cli_executable(program_name)
-add_python_binding(program_name)
-add_markdown_docs(program_name "cli;python" "category")
-@endcode
-
-In this example, @c add_markdown_docs() will generate documentation that is
-typically used to build the website.  The "category" parameter should be one of
-the categories in @c src/mlpack/bindings/markdown/MarkdownCategories.cmake.
-
-@section bindings_general How to write mlpack bindings
-
-This section describes the general structure of the @c IO code and how one
-might write a new binding for mlpack.  After reading this section it should be
-relatively clear how one could use the @c IO functionality along with CMake to
-add a binding for a new mlpack machine learning method.  If it is not clear,
-then the examples in the following sections should clarify.
-
-@subsection bindings_general_program_doc Documenting a program with
-@c BINDING_NAME(), @c BINDING_SHORT_DESC(), @c BINDING_LONG_DESC(),
-@c BINDING_EXAMPLE() and @c BINDING_SEE_ALSO().
-
-Any mlpack program should be documented with the @c BINDING_NAME(),
-@c BINDING_SHORT_DESC(), @c BINDING_LONG_DESC() , @c BINDING_EXAMPLE() and
-@c BINDING_SEE_ALSO() macros, which is available from the
-@c <mlpack/core/util/mlpack_main.hpp> header.  The macros
-are of the form
-
-@code
-BINDING_NAME("program name");
-BINDING_SHORT_DESC("This is a short, two-sentence description of what the program does.");
-BINDING_LONG_DESC("This is a long description of what the program does."
-    " It might be many lines long and have lots of details about different options.");
-BINDING_EXAMPLE("This contains one example for this particular binding.\n" +
-    PROGRAM_CALL(...));
-BINDING_EXAMPLE("This contains another example for this particular binding.\n" +
-    PROGRAM_CALL(...));
-// There could be many of these "see alsos".
-BINDING_SEE_ALSO("https://en.wikipedia.org/wiki/Machine_learning");
-@endcode
-
-The short documentation should be two sentences indicating what the program
-implements and does, and a quick overview of how it can be used and what it
-should be used for.  When writing new short documentation, it is a good idea to
-take a look at the existing documentation to get an idea of the general format.
-
-For the "see also" section, you can specify as many @c SEE_ALSO() calls as you
-see fit.  These are links used at the "see also" section of the website
-documentation for each binding, and it's very important that relevant links are
-provided (also to other bindings).  See the @c SEE_ALSO() documentation for more
-details.
-
-Although it is possible to provide very short documentation, it is certainly
-better to provide a long description including
-
- - what the program does
- - a basic overview of what input and output parameters the program has
- - at least one example invocation
-
-Examples are very important, and are probably what most users are going to
-immediately search for, instead of taking a long time to read and carefully
-consider all of the written documentation.
-
-However, it is difficult to write language-agnostic documentation.  For
-instance, in a command-line program, an output parameter '\--output_file' would
-be specified on the command line as an input parameter, but in Python, the
-output parameter 'output' would actually simply be returned from the call to the
-Python function.  Therefore, we must be careful how our documentation refers to
-input and output parameters.  The following general guidelines can help:
-
- - Always refer to output parameters as "output parameters", which is a fairly
-   close term that can be interpreted to mean both "return values" for languages
-   like Python and MATLAB and also "arguments given on the command line" for
-   command line programs.
-
- - Use the provided @c PRINT_PARAM_STRING() macro to print the names of
-   parameters.  For instance, <tt>PRINT_PARAM_STRING("shuffle")</tt> will print
-   @c '\--shuffle' for a command line program and @c 'shuffle' for a Python
-   binding.  The @c PRINT_PARAM_STRING() macro also takes into account the type
-   of the parameter.
-
- - Use the provided @c PRINT_DATASET() and @c PRINT_MODEL() macro to introduce
-   example datasets or models, which can be useful when introducing an example
-   usage of the program.  So you could write @c '"to @c run @c with @c a
-   @c dataset @c " @c + @c PRINT_DATASET("data") @c + @c "..."'.
-
- - Use the provided @c PRINT_CALL() macro to print example invocations of the
-   program.  The first argument is the name of the program, and then the
-   following arguments should be the name of a parameter followed by the value
-   of that parameter.
-
- - Never mention files in the documentation---files are only relevant to
-   command-line programs.  Similarly, avoid mentioning anything
-   language-specific.
-
- - Remember that some languages give output through return values and some give
-   output using other input parameters.  So the right verbiage to use is, e.g.,
-   <tt>'the results may be saved using the PRINT_PARAM_STRING("output")
-   parameter'</tt>, and @b not <tt>'the results are returned through the
-   PRINT_PARAM_STRING("output") parameter'</tt>.
-
-Each of these macros (@c PRINT_PARAM_STRING(), @c PRINT_DATASET(),
-@c PRINT_MODEL(), and @c PRINT_CALL() ) provides different output depending on
-the language.  Below are some example of documentation strings and their outputs
-for different languages.  Note that the output might not be *exactly* as written
-or formatted here, but the general gist should be the same.
-
-@code
-Input C++ (snippet):
-
-  "The parameter " + PRINT_PARAM_STRING("shuffle") + ", if set, will shuffle "
-  "the data before learning."
-
-Command-line program output (snippet):
-
-  The parameter '--shuffle', if set, will shuffle the data before learning.
-
-Python binding output (snippet):
-
-  The parameter 'shuffle', if set, will shuffle the data before learning.
-
-Julia binding output (snippet):
-
-  The parameter `shuffle`, if set, will shuffle the data before learning.
-
-Go binding output (snippet):
-
-  The parameter "Shuffle", if set, will shuffle the data before learning.
-@endcode
-
-@code
-Input C++ (snippet):
-
-  "The output matrix can be saved with the " + PRINT_PARAM_STRING("output") +
-  " output parameter."
-
-Command-line program output (snippet):
-
-  The output matrix can be saved with the '--output_file' output parameter.
-
-Python binding output (snippet):
-
-  The output matrix can be saved with the 'output' output parameter.
-
-Julia binding output (snippet):
-
-  The output matrix can be saved with the `output` output parameter.
-
-Go binding output (snippet):
-
-  The output matrix can be saved with the "output" output parameter.
-@endcode
-
-@code
-Input C++ (snippet):
-
-  "For example, to train a model on the dataset " + PRINT_DATASET("x") + " and "
-  "save the output model to " + PRINT_MODEL("model") + ", the following command"
-  " can be used:"
-  "\n\n" +
-  PRINT_CALL("program", "input", "x", "output_model", "model")
-
-Command-line program output (snippet):
-
-  For example, to train a model on the dataset 'x.csv' and save the output model
-  to 'model.bin', the following command can be used:
-
-  $ program --input_file x.csv --output_model_file model.bin
-
-Python binding output (snippet):
-
-  For example, to train a model on the dataset 'x' and save the output model to
-  'model', the following command can be used:
-
-  >>> output = program(input=x)
-  >>> model = output['output_model']
-
-Julia binding output (snippet):
-
-  For example, to train a model on the dataset `x` and save the output model to
-  `model`, the following command can be used:
-
-  julia> model = program(input=x)
-
-Go binding output (snippet):
-
-  For example, to train a model on the dataset "x" and save the output model to
-  "model", the following command can be used:
-
-    // Initialize optional parameters for Program().
-    param := mlpack.ProgramOptions()
-    param.Input = x
-
-    model := mlpack.Program(param)
-@endcode
-
-@code
-Input C++ (full program, 'random_numbers_main.cpp'):
-
-  // Program Name.
-  BINDING_NAME("Random Numbers");
-
-  // Short description.
-  BINDING_SHORT_DESC("An implementation of Random Numbers");
-
-  // Long description.
-  BINDING_LONG_DESC(
-      "This program generates random numbers with a "
-      "variety of nonsensical techniques and example parameters.  The input "
-      "dataset, which will be ignored, can be specified with the " +
-      PRINT_PARAM_STRING("input") + " parameter.  If you would like to subtract"
-      " values from each number, specify the " +
-      PRINT_PARAM_STRING("subtract") + " parameter.  The number of random "
-      "numbers to generate is specified with the " +
-      PRINT_PARAM_STRING("num_values") + " parameter."
-      "\n\n"
-      "The output random numbers can be saved with the " +
-      PRINT_PARAM_STRING("output") + " output parameter.  In addition, a "
-      "randomly generated linear regression model can be saved with the " +
-      PRINT_PARAM_STRING("output_model") + " output parameter.");
-
-  // Example.
-  BINDING_EXAMPLE(
-      "For example, to generate 100 random numbers with 3 subtracted from them "
-      "and save the output to " + PRINT_DATASET("rand") + " and the random "
-      "model to " + PRINT_MODEL("rand_lr") + ", use the following "
-      "command:"
-      "\n\n" +
-      PRINT_CALL("random_numbers", "num_values", 100, "subtract", 3, "output",
-          "rand", "output_model", "rand_lr"));
-
-Command line output:
-
-    Random Numbers
-
-    This program generates random numbers with a variety of nonsensical
-    techniques and example parameters.  The input dataset, which will be
-    ignored, can be specified with the '--input_file' parameter.  If you would
-    like to subtract values from each number, specify the '--subtract'
-    parameter.  The number of random numbers to generate is specified with the
-    '--num_values' parameter.
-
-    The output random numbers can be saved with the '--output_file' output
-    parameter.  In addition, a randomly generated linear regression model can be
-    saved with the '--output_model_file' output parameter.
-
-    For example, to generate 100 random numbers with 3 subtracted from them and
-    save the output to 'rand.csv' and the random model to 'rand_lr.bin', use the
-    following command:
-
-    $ random_numbers --num_values 100 --subtract 3 --output_file rand.csv
-      --output_model_file rand_lr.bin
-
-Python binding output:
-
-    Random Numbers
-
-    This program generates random numbers with a variety of nonsensical
-    techniques and example parameters.  The input dataset, which will be
-    ignored, can be specified with the 'input' parameter.  If you would like to
-    subtract values from each number, specify the 'subtract' parameter.  The
-    number of random numbers to generate is specified with the 'num_values'
-    parameter.
-
-    The output random numbers can be saved with the 'output' output parameter.
-    In addition, a randomly generated linear regression model can be saved with
-    the 'output_model' output parameter.
-
-    For example, to generate 100 random numbers with 3 subtracted from them and
-    save the output to 'rand' and the random model to 'rand_lr', use the
-    following command:
-
-    >>> output = random_numbers(num_values=100, subtract=3)
-    >>> rand = output['output']
-    >>> rand_lr = output['output_model']
-
-Julia binding output:
-
-    Random Numbers
-
-    This program generates random numbers with a variety of nonsensical
-    techniques and example parameters.  The input dataset, which will be
-    ignored, can be specified with the `input` parameter.  If you would like to
-    subtract values from each number, specify the `subtract` parameter.  The
-    number of random numbers to generate is specified with the `num_values`
-    parameter.
-
-    The output random numbers can be saved with the `output` output parameter.
-    In addition, a randomly generated linear regression model can be saved with
-    the `output_model` output parameter.
-
-    For example, to generate 100 random numbers with 3 subtracted from them and
-    save the output to `rand` and the random model to `rand_lr`, use the
-    following command:
-
-    ```julia
-    julia> rand, rand_lr = random_numbers(num_values=100, subtract=3)
-    ```
-
-Go binding output:
-
-    Random Numbers
-
-    This program generates random numbers with a variety of nonsensical
-    techniques and example parameters.  The input dataset, which will be
-    ignored, can be specified with the "Input" parameter.  If you would like to
-    subtract values from each number, specify the "Subtract" parameter.  The
-    number of random numbers to generate is specified with the "NumValues"
-    parameter.
-
-    The output random numbers can be saved with the "output" output parameter.
-    In addition, a randomly generated linear regression model can be saved with
-    the "outputModel" output parameter.
-
-    For example, to generate 100 random numbers with 3 subtracted from them and
-    save the output to "rand" and the random model to "randLr", use the
-    following command:
-
-    // Initialize optional parameters for RandomNumbers().
-    param := mlpack.RandomNumbersOptions()
-    param.NumValues = 100
-    param.Subtract=3
-
-    rand, randLr := mlpack.RandomNumbers(param)
-@endcode
-
-@subsection bindings_general_define_params Defining parameters for a program
-
-There exist several macros that can be used after a @c BINDING_LONG_DESC() and
-@c BINDING_EXAMPLE() definition to define the parameters that can be specified
-for a given mlpack program. These macros all have the same general definition:
-the name of the macro specifies the type of the parameter, whether or not the
-parameter is required, and whether the parameter is an input or output parameter.
-Then as arguments to the macros, the name, description, and sometimes the
-single-character alias and the default value of the parameter.
-
-To give a flavor of how these definitions look, the definition
-
-@code
-PARAM_STRING_IN("algorithm", "The algorithm to use: 'svd' or 'blah'.", "a");
-@endcode
-
-will define a string input parameter @c algorithm (referenced as
-@c '\--algorithm' from the command-line or @c 'algorithm' from Python) with the
-description <tt>The algorithm to use: 'svd' or 'blah'.</tt>  The
-single-character alias @c '-a' can be used from a command-line program (but
-means nothing in Python).
-
-There are numerous different macros that can be used:
-
- - @c PARAM_FLAG() - boolean flag parameter
- - @c PARAM_INT_IN() - integer input parameter
- - @c PARAM_INT_OUT() - integer output parameter
- - @c PARAM_DOUBLE_IN() - double input parameter
- - @c PARAM_DOUBLE_OUT() - double output parameter
- - @c PARAM_STRING_IN() - string input parameter
- - @c PARAM_STRING_OUT() - string output parameter
- - @c PARAM_MATRIX_IN() - double-valued matrix (<tt>arma::mat</tt>) input
-       parameter
- - @c PARAM_MATRIX_OUT() - double-valued matrix (<tt>arma::mat</tt>) output
-       parameter
- - @c PARAM_UMATRIX_IN() - size_t-valued matrix (<tt>arma::Mat<size_t></tt>)
-       input parameter
- - @c PARAM_UMATRIX_OUT() - size_t-valued matrix (<tt>arma::Mat<size_t></tt>)
-       output parameter
- - @c PARAM_TMATRIX_IN() - transposed double-valued matrix (<tt>arma::mat</tt>)
-       input parameter
- - @c PARAM_TMATRIX_OUT() - transposed double-valued matrix (<tt>arma::mat</tt>)
-       output parameter
- - @c PARAM_MATRIX_AND_INFO_IN() - matrix with categoricals input parameter
-       (<tt>std::tuple<data::DatasetInfo, arma::mat</tt>)
- - @c PARAM_COL_IN() - double-valued column vector (<tt>arma::vec</tt>) input
-       parameter
- - @c PARAM_COL_OUT() - double-valued column vector (<tt>arma::vec</tt>) output
-       parameter
- - @c PARAM_UCOL_IN() - size_t-valued column vector (<tt>arma::Col<size_t></tt>)
-       input parameter
- - @c PARAM_UCOL_OUT() - size_t-valued column vector
-       (<tt>arma::Col<size_t></tt>) output parameter
- - @c PARAM_ROW_IN() - double-valued row vector (<tt>arma::rowvec</tt>) input
-       parameter
- - @c PARAM_ROW_OUT() - double-valued row vector (<tt>arma::rowvec</tt>) output
-       parameter
- - @c PARAM_VECTOR_IN() - <tt>std::vector</tt> input parameter
- - @c PARAM_VECTOR_OUT() - <tt>std::vector</tt> output parameter
- - @c PARAM_MODEL_IN() - serializable model input parameter
- - @c PARAM_MODEL_OUT() - serializable model output parameter
-
-And for input parameters, the parameter may also be required:
-
- - @c PARAM_INT_IN_REQ()
- - @c PARAM_DOUBLE_IN_REQ()
- - @c PARAM_STRING_IN_REQ()
- - @c PARAM_MATRIX_IN_REQ()
- - @c PARAM_UMATRIX_IN_REQ()
- - @c PARAM_TMATRIX_IN_REQ()
- - @c PARAM_VECTOR_IN_REQ()
- - @c PARAM_MODEL_IN_REQ()
-
-Click the links for each macro to read further documentation.  Note also that
-each possible combination of @c IN, @c OUT, and @c REQ is not available---output
-options cannot be required, and some combinations simply have not been added
-because they have not been needed.
-
-The @c PARAM_MODEL_IN() and @c PARAM_MODEL_OUT() macros are used to serialize
-mlpack models.  These could be used, for instance, to allow the user to save a
-trained model (like a linear regression model) or load an input model.  The
-first parameter to the @c PARAM_MODEL_IN() or @c PARAM_MODEL_OUT() macro should
-be the C++ type of the model to be serialized; this type @b must have a function
-<tt>template<typename Archive> void Serialize(Archive&, const unsigned int)</tt>
-(i.e. the type must be serializable via mlpack's boost::serialization shim).
-For example, to allow a user to specify an input model of type
-`LinearRegression`, the follow definition could be used:
-
-@code
-PARAM_MODEL_IN(LinearRegression, "input_model", "The input model to be used.",
-    "i");
-@endcode
-
-Then, the user will be able to specify their model from the command-line as
-@c \--input_model_file and from Python using the @c input_model option to the
-generated binding.
-
-From the command line, matrix-type and model-type options (both input and
-output) are loaded from or saved to the specified file.  This means that
-@c _file is appended to the name of the parameter; so if the parameter name is
-@c data and it is of a matrix or model type, then the name that the user will
-specify on the command line will be @c \--data_file.  This displayed parameter
-name change @b only occurs with matrix and model type parameters for
-command-line programs.
-
-The @c PARAM_MATRIX_AND_INFO() macro defines a categorical matrix parameter
-(more specifically, a matrix type that can support categorical columns).  From
-the C++ program side, this means that the parameter type is
-<tt>std::tuple<data::DatasetInfo, arma::mat></tt>.  From the user side, for a
-command-line program, this means that the user will pass the filename of a
-dataset that can have categorical features, such as an ARFF dataset.  For a
-Python program, the user may pass a Pandas matrix with categorical columns.
-When the program is run, the input that the user gives will be processed and the
-@c data::DatasetInfo object will be filled with the dimension types and the
-@c arma::mat object will be filled with the data itself.
-
-To give some examples, the parameter definitions from the example
-"random_numbers" program in the previous section are shown below.
-
-@code
-PARAM_MATRIX_IN("input", "The input matrix that will be ignored.", "i");
-PARAM_DOUBLE_IN("subtract", "The value to subtract from each parameter.", "s",
-    0.0); // Default value of 0.0.
-PARAM_INT_IN("num_samples", "The number of samples to generate.", "n", 100);
-
-PARAM_MATRIX_OUT("output", "The output matrix of random samples.", "o");
-PARAM_MODEL_OUT(LinearRegression, "output_model", "The randomly generated "
-    "linear regression output model.", "M");
-@endcode
-
-Note that even the parameter documentation strings must be a little be agnostic
-to the binding type, because the command-line interface is so different than the
-Python interface to the user.
-
-@subsection bindings_general_functions Using IO in an mlpackMain() function
-
-mlpack's @c IO module provides a unified abstract interface for getting input
-from and providing output to users without needing to consider the language
-(command-line, Python, MATLAB, etc.) that the user is running the program from.
-This means that after the @c BINDING_LONG_DESC() and @c BINDING_EXAMPLE() macros
-and the @c PARAM_*() macros have been defined, a language-agnostic
-@c mlpackMain() function can be written. This function then can perform the
-actual computation that the entire program is meant to.
-
-Inside of an @c mlpackMain() function, the @c mlpack::IO module can be used to
-access input parameters and set output parameters.  There are two main functions
-for this, plus a utility printing function:
-
- - @c IO::GetParam<T>() - get a reference to a parameter
- - @c IO::HasParam() - returns true if the user specified the parameter
- - @c IO::GetPrintableParam<T>() - returns a string representing the value of
-      the parameter
-
-So, to print "hello" if the user specified the @c print_hello parameter, the
-following code could be used:
-
-@code
-using namespace mlpack;
-
-if (IO::HasParam("print_hello"))
-  std::cout << "Hello!" << std::endl;
-else
-  std::cout << "No greetings for you!" << std::endl;
-@endcode
-
-To access a string that a user passed in to the @c string parameter, the
-following code could be used:
-
-@code
-using namespace mlpack;
-
-const std::string& str = IO::GetParam<std::string>("string");
-@endcode
-
-Matrix types are accessed in the same way:
-
-@code
-using namespace mlpack;
-
-arma::mat& matrix = IO::GetParam<arma::mat>("matrix");
-@endcode
-
-Similarly, model types can be accessed.  If a @c LinearRegression model was
-specified by the user as the parameter @c model, the following code can access
-the model:
-
-@code
-using namespace mlpack;
-
-LinearRegression& lr = IO::GetParam<LinearRegression>("model");
-@endcode
-
-Matrices with categoricals are a little trickier to access since the C++
-parameter type is <tt>std::tuple<data::DatasetInfo, arma::mat></tt>.  The
-example below creates references to both the @c DatasetInfo and matrix objects,
-assuming the user has passed a matrix with categoricals as the @c matrix
-parameter.
-
-@code
-using namespace mlpack;
-
-typename std::tuple<data::DatasetInfo, arma::mat> TupleType;
-data::DatasetInfo& di = std::get<0>(IO::GetParam<TupleType>("matrix"));
-arma::mat& matrix = std::get<1>(IO::GetParam<TupleType>("matrix"));
-@endcode
-
-These two functions can be used to write an entire program.  The third function,
-@c GetPrintableParam(), can be used to help provide useful output in a program.
-Typically, this function should be used if you want to provide some kind of
-error message about a matrix or model parameter, but want to avoid printing the
-matrix itself.  For instance, printing a matrix parameter with
-@c GetPrintableParam() will print the filename for a command-line binding or the
-size of a matrix for a Python binding.  @c GetPrintableParam() for a model
-parameter will print the filename for the model for a command-line binding or
-a simple string representing the type of the model for a Python binding.
-
-Putting all of these ideas together, here is the @c mlpackMain() function that
-could be created for the "random_numbers" program from earlier sections.
-
-@code
-#include <mlpack/core/util/mlpack_main.hpp>
-
-// BINDING_NAME(), BINDING_SHORT_DESC(), BINDING_LONG_DESC() , BINDING_EXAMPLE(),
-// BINDING_SEE_ALSO() and PARAM_*() definitions should go here:
-// ...
-
-using namespace mlpack;
-
-void mlpackMain()
-{
-  // If the user passed an input matrix, tell them that we'll be ignoring it.
-  if (IO::HasParam("input"))
-  {
-    // Print the filename the user passed, if a command-line binding, or the
-    // size of the matrix passed, if a Python binding.
-    Log::Warn << "The input matrix "
-        << IO::GetPrintableParam<arma::mat>("input") << " is ignored!"
-        << std::endl;
-  }
-
-  // Get the number of samples and also the value we should subtract.
-  const size_t numSamples = (size_t) IO::GetParam<int>("num_samples");
-  const double subtractValue = IO::GetParam<double>("subtract");
-
-  // Create the random matrix (1-dimensional).
-  arma::mat output(1, numSamples, arma::fill::randu);
-  output -= subtractValue;
-
-  // Save the output matrix if the user wants.
-  if (IO::HasParam("output"))
-    IO::GetParam<arma::mat>("output") = std::move(output); // Avoid copy.
-
-  // Did the user request a random linear regression model?
-  if (IO::HasParam("output_model"))
-  {
-    LinearRegression lr;
-    lr.Parameters().randu(10); // 10-dimensional (arbitrary).
-    lr.Lambda() = 0.0;
-    lr.Intercept() = false; // No intercept term.
-
-    IO::GetParam<LinearRegression>("output_model") = std::move(lr);
-  }
-}
-@endcode
-
-@subsection bindings_general_more More documentation on using IO
-
-More documentation for the IO module can either be found on the mlpack::IO
-documentation page, or by reading the existing mlpack bindings.  These can be
-found in the @c src/mlpack/methods/ folders, by finding the @c _main.cpp files.
-For instance, @c src/mlpack/methods/neighbor_search/knn_main.cpp is the
-k-nearest-neighbor search program definition.
-
-@section bindings_structure Structure of IO module and associated macros
-
-This section describes the internal functionality of the IO module and the
-associated macros.  If you are only interested in writing mlpack programs, this
-section is probably not worth reading.
-
-There are eight main components involved with mlpack bindings:
-
- - the IO module, a singleton class that stores parameter information
- - the mlpackMain() function that defines the functionality of the binding
- - the BINDING_NAME() macro that defines the binding name
- - the BINDING_SHORT_DESC() macro that defines the short description
- - the BINDING_LONG_DESC() macro that defines the long description
- - (optional) the BINDING_EXAMPLE() macro that defines example usages
- - (optional) the BINDING_SEE_ALSO() macro that defines "see also" links
- - the PARAM_*() macros that define parameters for the binding
-
-The mlpack::IO module is a singleton class that stores, at runtime, the binding
-name, the documentation, and the parameter information and values.  In order to
-do this, each parameter and the program documentation must make themselves known
-to the IO singleton.  This is accomplished by having the @c BINDING_NAME(),
-@c BINDING_SHORT_DESC(), @c BINDING_LONG_DESC(), @c BINDING_EXAMPLE(),
-@c BINDING_SEE_ALSO() and @c PARAM_*() macros declare global variables that,
-in their constructors, register themselves with the IO singleton.
-
-The @c BINDING_NAME() macro declares an object of type mlpack::util::ProgramName.
-The @c BINDING_SHORT_DESC() macro declares an object of type
-mlpack::util::ShortDescription.
-The @c BINDING_LONG_DESC() macro declares an object of type
-mlpack::util::LongDescription.
-The @c BINDING_EXAMPLE() macro declares an object of type mlpack::util::Example.
-The @c BINDING_SEE_ALSO() macro declares an object of type
-mlpack::util::SeeAlso.
-The @c ProgramName class constructor calls IO::RegisterProgramName() in order to
-register the given program name.
-The @c ShortDescription class constructor calls IO::RegisterShortDescription() in order to
-register the given short description.
-The @c LongDescription class constructor calls IO::RegisterLongDescription() in order to
-register the given long description.
-The @c Example class constructor calls IO::RegisterExample() in order to
-register the given example.
-The @c SeeAlso class constructor calls IO::RegisterSeeAlso() in order to
-register the given see-also link.
-
-The @c PARAM_*() macros declare an object that will, in its constructor, call
-IO::Add() to register that parameter with the IO singleton.  The specific type
-of that object will depend on the binding type being used.
-
-The IO::Add() function takes an mlpack::util::ParamData object as its input.
-This @c ParamData object has a number of fields that must be set to properly
-describe the parameter.  Each of the fields is documented and probably
-self-explanatory, but three fields deserve further explanation:
-
- - the <tt>std::string tname</tt> member is used to encode the true type of
-   the parameter---which is not known by the IO singleton at runtime.  This
-   should be set to <tt>TYPENAME(T)</tt> where @c T is the type of the
-   parameter.
-
- - the <tt>boost::any value</tt> member is used to hold the actual value of the
-   parameter.  Typically this will simply be the parameter held by a
-   @c boost::any object, but for some types it may be more complex.  For
-   instance, for a command-line matrix option, the @c value parameter will
-   actually hold a tuple containing both the filename and the matrix itself.
-
- - the <tt>std::string cppType</tt> should be a string containing the type as
-   seen in C++ code.  Typically this can be encoded by stringifying a
-   @c PARAM_*() macro argument.
-
-Thus, the global object defined by the @c PARAM_*() macro must turn its
-arguments into a fully specified @c ParamData object and then call IO::Add()
-with it.
-
-With different binding types, different behavior is often required for the
-@c GetParam<T>(), @c HasParam(), and @c GetPrintableParam<T>() functions.  In
-order to handle this, the IO singleton also holds a function pointer map, so
-that a given type of option can call specific functionality for a certain task.
-This function map is accessible as @c IO::functionMap and is not meant to be
-used by users, but instead by people writing binding types.
-
-Each function in the map must have signature
-
-@code
-void MapFunction(const util::ParamData& d,
-                 const void* input,
-                 void* output);
-@endcode
-
-The use of void pointers allows any type to be specified as input or output to
-the function without changing the signature for the map.  The IO function map
-is of type
-
-@code
-std::map<std::string, std::map<std::string,
-    void (*)(const util::ParamData&, const void*, void*)>>
-@endcode
-
-and the first map key is the typename (<tt>tname</tt>) of the parameter, and the
-second map key is the string name of the function.  For instance, calling
-
-@code
-const util::ParamData& d = IO::Parameters()["param"];
-IO::GetSingleton().functionMap[d.tname]["GetParam"](d, input, output);
-@endcode
-
-will call the @c GetParam() function for the type of the @c "param" parameter.
-Examples are probably easiest to understand how this functionality works; see
-the IO::GetParam<T>() source to see how this might be used.
-
-The IO singleton expects the following functions to be defined in the function
-map for each type:
-
- - @c GetParam -- return a pointer to the parameter in @c output.
- - @c GetPrintableParam -- return a pointer to a string description of the
-       parameter in @c output.
-
-If these functions are properly defined, then the IO module will work
-correctly.  Other functions may also be defined; these may be used by other
-parts of the binding infrastructure for different languages.
-
-@section bindings_cli Command-line program bindings
-
-This section describes the internal functionality of the command-line program
-binding generator.  If you are only interested in writing mlpack programs, this
-section probably is not worth reading.  This section is worth reading only if
-you want to know the specifics of how the @c mlpackMain() function and macros
-get turned into a fully working command-line program.
-
-The code for the command-line bindings is found in @c src/mlpack/bindings/cli.
-
-@subsection bindings_cli_mlpack_main mlpackMain() definition
-
-Any command-line program must be compiled with the @c BINDING_TYPE macro
-set to the value @c BINDING_TYPE_CLI.  This is handled by the CMake macro
-@c add_cli_executable().
-
-When @c BINDING_TYPE is set to @c BINDING_TYPE_CLI, the following is set in
-@c src/mlpack/core/util/mlpack_main.hpp, which must be included by every mlpack
-binding:
-
- - The options defined by @c PARAM_*() macros are of type
-   mlpack::bindings::cli::CLIOption.
-
- - The parameter and value printing macros for @c BINDING_LONG_DESC()
-   and BINDING_EXAMPLE() are set:
-   * The @c PRINT_PARAM_STRING() macro is defined as
-     mlpack::bindings::cli::ParamString().
-   * The @c PRINT_DATASET() macro is defined as
-     mlpack::bindings::cli::PrintDataset().
-   * The @c PRINT_MODEL() macro is defined as
-     mlpack::bindings::cli::PrintModel().
-   * The @c PRINT_CALL() macro is defined as
-     mlpack::bindings::cli::ProgramCall().
-
- - The function <tt>int main()</tt> is defined as:
-
-@code
-int main(int argc, char** argv)
-{
-  // Parse the command-line options; put them into IO.
-  mlpack::bindings::cli::ParseCommandLine(argc, argv);
-
-  mlpackMain();
-
-  // Print output options, print verbose information, save model parameters,
-  // clean up, and so forth.
-  mlpack::bindings::cli::EndProgram();
-}
-@endcode
-
-Thus any mlpack command-line binding first processes the command-line arguments
-with mlpack::bindings::cli::ParseCommandLine(), then runs the binding with
-@c mlpackMain(), then cleans up with mlpack::bindings::cli::EndProgram().
-
-The @c ParseCommandLine() function reads the input parameters and sets the
-values in IO.  For matrix-type and model-type parameters, this reads the
-filenames from the command-line, but does not load the matrix or model.  Instead
-the matrix or model is loaded the first time it is accessed with
-@c GetParam<T>().
-
-The @c \--help parameter is handled by the mlpack::bindings::cli::PrintHelp()
-function.
-
-At the end of program execution, the mlpack::bindings::cli::EndProgram()
-function is called.  This writes any output matrix or model parameters to disk,
-and prints the program parameters and timers if @c \--verbose was given.
-
-@subsection bindings_cli_matrix Matrix and model parameter handling
-
-For command line bindings, the matrix, model, and matrix with categorical type
-parameters all require special handling, since it is not possible to pass a
-matrix of any reasonable size or a model on the command line directly.
-Therefore for a matrix or model parameter, the user specifies the file
-containing that matrix or model parameter.  If the parameter is an input
-parameter, then the file is loaded when @c GetParam<T>() is called.  If the
-parameter is an output parameter, then the matrix or model is saved to the file
-when @c EndProgram() is called.
-
-The actual implementation of this is that the <tt>boost::any value</tt> member
-of the @c ParamData struct does not hold the model or the matrix, but instead a
-<tt>std::tuple</tt> containing both the matrix or the model, and the filename
-associated with that matrix or model.
-
-This means that functions like @c GetParam<T>() and @c GetPrintableParam<T>()
-(and all of the other associated functions in the IO function map) must have
-special handling for matrix or model types.  See those implementatipns for more
-details---the special handling is enforced via SFINAE.
-
-@subsection bindings_cli_parsing Parsing the command line
-
-The @c ParseCommandLine() function uses <tt>CLI11</tt> to read
-the values from the command line into the @c ParamData structs held by the IO
-singleton.
-
-In order to set up <tt>CLI11</tt>---and to keep its headers
-from needing to be included by the rest of the library---the code loops over
-each parameter known by the IO singleton and calls the @c "AddToPO" function
-from the function map.  This in turn calls the necessary functions to register a
-given parameter with <tt>CLI11</tt>, and once all parameters
-have been registered, the facilities provided by <tt>CLI11</tt>
-are used to parse the command line input properly.
-
-@section bindings_python Python bindings
-
-This section describes the internal functionality of the mlpack Python binding
-generator.  If you are only interested in writing new bindings or building the
-bindings, this section is probably not worth reading.  But if you are interested
-in the internal working of the Python binding generator, then this section is
-for you.
-
-The Python bindings are significantly more complex than the command line
-bindings because we cannot just compile directly to a finished product.  Instead
-we need a multi-stage compilation:
-
- - We must generate a setup.py file that can be used to compile the bindings.
- - We must generate the .pyx (Cython) bindings for each program.
- - Then we must build each .pyx into a .so that is loadable from Python.
- - We must also test the Python bindings.
-
-This is done with a combination of C++ code to generate the .pyx bindings, CMake
-to run the actual compilation and generate the setup.py file, some utility
-Python functions, and tests written in both Python and C++.  This code is
-primarily contained in @c src/mlpack/bindings/python/.
-
-@subsection bindings_python_matrix Passing matrices to/from Python
-
-The standard Python matrix library is numpy, so mlpack bindings should accept
-numpy matrices as input.  Fortunately, numpy Cython bindings already exist,
-which make it easy to convert from a numpy object to an Armadillo object without
-copying any data.  This code can be found in
-@c src/mlpack/bindings/python/mlpack/arma_numpy.pyx, and is used by the Python
-@c GetParam<T>() functionality.
-
-mlpack also supports categorical matrices; in Python, the typical way of
-representing matrices with categorical features is with Pandas.  Therefore,
-mlpack also accepts Pandas matrices, and if any of the Pandas matrix dimensions
-are categorical, these are properly encoded.  The function
-@c to_matrix_with_info() from @c mlpack/bindings/python/mlpack/matrix_utils.py
-is used to perform this conversion.
-
-@subsection bindings_python_model Passing model parameter to/from Python
-
-We use (or abuse) Cython functionality in order to give the user a model object
-that they can use in their Python code.  However, we do not want to (or have the
-infrastructure to) write bindings for every method that a serializable model
-class might support; therefore, we only desire to return a memory pointer to the
-model to the user.
-
-In this way, a user that receives a model from an output parameter can then
-reuse the model as an input parameter to another binding (or the same binding).
-
-To return a function pointer we have to define a Cython class in the following
-way (this example is taken from the perceptron binding):
-
-@code
-cdef extern from "</home/ryan/src/mlpack-rc/src/mlpack/methods/perceptron/perceptron_main.cpp>" nogil:
-  cdef int mlpackMain() nogil except +RuntimeError
-
-  cdef cppclass PerceptronModel:
-    PerceptronModel() nogil
-
-
-cdef class PerceptronModelType:
-  cdef PerceptronModel* modelptr
-
-  def __cinit__(self):
-    self.modelptr = new PerceptronModel()
-
-  def __dealloc__(self):
-    del self.modelptr
-@endcode
-
-This class definition is automatically generated when the .pyx file is
-automatically generated.
-
-@subsection bindings_python_setup_py CMake generation of setup.py
-
-A boilerplate setup.py file can be found in
-@c src/mlpack/bindings/python/setup.py.in.  This will be configured by CMake to
-produce the final @c setup.py file, but in order to do this, a list of the .pyx
-files to be compiled must be gathered.
-
-Therefore, the @c add_python_binding() macro is defined in
-@c src/mlpack/bindings/python/CMakeLists.txt.  This adds the given binding to
-the @c MLPACK_PYXS variable, which is then inserted into @c setup.py as part of
-the @c configure_file() step in @c src/mlpack/CMakeLists.txt.
-
-@subsection bindings_python_generate_pyx Generation of .pyx files
-
-A binding named @c program is built into a program called
-@c generate_pyx_program (this a CMake target, so you can build these
-individually if you like).  The file
-@c src/mlpack/bindings/python/generate_pyx.cpp.in is configured by CMake to set
-the name of the program and the @c *_main.cpp file to include correctly, then
-the @c mlpack::bindings::python::PrintPYX() function is called by the program.
-The @c PrintPYX() function uses the parameters that have been set in the IO
-singleton by the @c BINDING_NAME(), @c BINDING_SHORT_DESC(),
-@c BINDING_LONG_DESC(), @c BINDING_EXAMPLE(), @c BINDING_SEE_ALSO() and
-@c PARAM_*() macros in order to actually print a fully-working .pyx file that
-can be compiled.  The file has several sections:
-
- - Python imports (numpy/pandas/cython/etc.)
- - Cython imports of C++ utility functions and Armadillo functionality
- - Cython imports of any necessary serializable model types
- - Definitions of classes for serializable model types
- - The binding function definition
- - Documentation: input and output parameters
- - The call to mlpackMain()
- - Handling of output functionality
- - Return of output parameters
-
-Any output parameters for Python bindings are returned in a dict containing
-named elements.
-
-@subsection bindings_python_build_pyx Building the .pyx files
-
-After building the @c generate_pyx_program target, the @c build_pyx_program
-target is built as a dependency of the @c python target.  This simply takes the
-generated .pyx file and uses Python setuptools to compile this to a Python
-binding.
-
-@subsection bindings_python_testing Testing the Python bindings
-
-We cannot do our tests only from the Boost Unit Test Framework in C++ because we
-need to see that we are able to load parameters properly from Python and return
-output correctly.
-
-The tests are in @c src/mlpack/bindings/python/tests/ and test both the actual
-bindings and also the auxiliary Python code included in
-@c src/mlpack/bindings/python/mlpack/.
-
-@section bindings_new Adding new binding types
-
-Adding a new binding type to mlpack is fairly straightforward once the general
-structure of the IO singleton and the function map that IO uses is understood.
-For each different language that bindings are desired for, the route to a
-solution will be particularly different---so it is hard to provide any general
-guidance for how to make new bindings that will be applicable to each language.
-
-In general, the first thing to handle will be how matrices are passed back and
-forth between the target language.  Typically this might mean getting the memory
-address of an input matrix and wrapping an @c arma::mat object around that
-memory address.  This can be handled in the @c GetParam() function that is part
-of the IO singleton function map; see @c get_param.hpp for both the IO and
-Python bindings for an example (in @c src/mlpack/bindings/cli/ and
-@c src/mlpack/bindings/python/).
-
-Serialization of models is also a tricky consideration; in some languages you
-will be able to pass a pointer to the model itself.  This is generally
-best---users should not expect to be able to manipulate the model in the target
-language, but they should expect that they can pass a model back and forth
-without paying a runtime penalty.  So, for example, serializing a model using a
-@c boost::text_oarchive and then returning the string that represents the model
-is not acceptable, because that string can be extremely large and the time it
-takes to decode the model can be very large.
-
-The strategy of generating a binding definition for the target language, like
-what is done with Python, can be a useful strategy that should be considered.
-If this is the route that is desired, a large amount of CMake boilerplate may be
-necessary.  The Python CMake configuration can be referred to as an example, but
-probably a large amount of adaptation to other languages will be necessary.
-
-Lastly, when adding a new language, be sure to make sure it works with the
-Markdown documentation generator.  In order to make this happen, you will need
-to modify all of the @c add_markdown_docs() calls in the different
-@c CMakeLists.txt files to contain the name of the language you have written a
-binding for.  You will also need to modify every function in
-@c src/mlpack/bindings/markdown/print_doc_functions_impl.hpp to correctly call
-out to the corresponding function for the language that you have written
-bindings for.
-
-*/
diff -pruN 3.4.2-7/doc/guide/build.hpp 4.0.1-1/doc/guide/build.hpp
--- 3.4.2-7/doc/guide/build.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/build.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,287 +0,0 @@
-/*! @page build Building mlpack From Source
-
-@section build_buildintro Introduction
-
-This document discusses how to build mlpack from source. These build directions 
-will work for any Linux-like shell environment (for example Ubuntu, macOS,
-FreeBSD etc). However, mlpack is in the repositories of many Linux distributions 
-and so it may be easier to use the package manager for your system.  For example, 
-on Ubuntu, you can install mlpack with the following command:
-
-@code
-$ sudo apt-get install libmlpack-dev
-@endcode
-
-@note Older Ubuntu versions may not have the most recent version of mlpack
-available---for instance, at the time of this writing, Ubuntu 16.04 only has
-mlpack 2.0.1 available.  Options include upgrading Ubuntu to a newer release,
-finding a PPA or other non-official sources, or installing with a manual build
-(below).
-
-If mlpack is not available in your system's package manager, then you can follow
-this document for how to compile and install mlpack from source.
-
-mlpack uses CMake as a build system and allows several flexible build
-configuration options.  One can consult any of numerous CMake tutorials for
-further documentation, but this tutorial should be enough to get mlpack built
-and installed on most Linux and UNIX-like systems (including OS X).  If you want
-to build mlpack on Windows, see \ref build_windows (alternatively, you can read 
-<a href="https://keon.io/mlpack-on-windows/">Keon's excellent tutorial</a> which
-is based on older versions).
-
-You can download the latest mlpack release from here:
-<a href="https://www.mlpack.org/files/mlpack-3.4.2.tar.gz">mlpack-3.4.2</a>
-
-@section build_simple Simple Linux build instructions
-
-Assuming all dependencies are installed in the system, you can run the commands
-below directly to build and install mlpack.
-
-@code
-$ wget https://www.mlpack.org/files/mlpack-3.4.2.tar.gz
-$ tar -xvzpf mlpack-3.4.2.tar.gz
-$ mkdir mlpack-3.4.2/build && cd mlpack-3.4.2/build
-$ cmake ../
-$ make -j4  # The -j is the number of cores you want to use for a build.
-$ sudo make install
-@endcode
-
-If the \c cmake \c .. command fails, you are probably missing a dependency, so
-check the output and install any necessary libraries.  (See \ref build_dep.)
-
-On many Linux systems, mlpack will install by default to @c /usr/local/lib and
-you may need to set the @c LD_LIBRARY_PATH environment variable:
-
-@code
-export LD_LIBRARY_PATH=/usr/local/lib
-@endcode
-
-The instructions above are the simplest way to get, build, and install mlpack.
-The sections below discuss each of those steps in further detail and show how to
-configure mlpack.
-
-@section build_builddir Creating Build Directory
-
-First we should unpack the mlpack source and create a build directory.
-
-@code
-$ tar -xvzpf mlpack-3.4.2.tar.gz
-$ cd mlpack-3.4.2
-$ mkdir build
-@endcode
-
-The directory can have any name, not just 'build', but 'build' is sufficient.
-
-@section build_dep Dependencies of mlpack
-
-mlpack depends on the following libraries, which need to be installed on the
-system and have headers present:
-
- - Armadillo >= 8.400.0 (with LAPACK support)
- - Boost (math_c99, serialization, unit_test_framework, heap,
-          spirit) >= 1.58
- - ensmallen >= 2.10.0 (will be downloaded if not found)
-
-In addition, mlpack has the following optional dependencies:
-
- - STB: this will allow loading of images; the library is downloaded if not
-   found and the CMake variable DOWNLOAD_STB_IMAGE is set to ON (the default)
-
-For Python bindings, the following packages are required:
-
- - setuptools
- - cython >= 0.24
- - numpy
- - pandas >= 0.15.0
- - pytest-runner
-
-In Ubuntu (>= 18.04) and Debian (>= 10) all of these dependencies can be 
-installed through apt:
-
-@code
-# apt-get install libboost-math-dev libboost-test-dev libboost-serialization-dev
-  libarmadillo-dev binutils-dev python3-pandas python3-numpy cython3
-  python3-setuptools
-@endcode
-
-If you are using Ubuntu 19.10 or newer, you can also install @c libensmallen-dev
-and @c libstb-dev, so that CMake does not need to automatically download those
-packages:
-
-@code
-# apt-get install libensmallen-dev libstb-dev
-@endcode
-
-@note For older versions of Ubuntu and Debian, Armadillo needs to be built from 
-source as apt installs an older version. So you need to omit 
-\c libarmadillo-dev from the code snippet above and instead use
-<a href="http://arma.sourceforge.net/download.html">this link</a>
- to download the required file. Extract this file and follow the README in the 
- uncompressed folder to build and install Armadillo.
-
-On Fedora, Red Hat, or CentOS, these same dependencies can be obtained via dnf:
-
-@code
-# dnf install boost-devel boost-test boost-math armadillo-devel binutils-devel 
-  python3-Cython python3-setuptools python3-numpy python3-pandas ensmallen-devel 
-  stbi-devel
-@endcode
-
-(It's also possible to use python3 packages from the package manager---mlpack
-will work with either.  Also, the ensmallen-devel package is only available in
-Fedora 29 or RHEL7 or newer.)
-
-@section build_config Configuring CMake
-
-Running CMake is the equivalent to running `./configure` with autotools.  If you
-run CMake with no options, it will configure the project to build without
-debugging or profiling information (for speed).
-
-@code
-$ cd build
-$ cmake ../
-@endcode
-
-You can manually specify options to compile with debugging information and
-profiling information (useful if you are developing mlpack):
-
-@code
-$ cd build
-$ cmake -D DEBUG=ON -D PROFILE=ON ../
-@endcode
-
-The full list of options mlpack allows:
-
- - DEBUG=(ON/OFF): compile with debugging symbols (default OFF)
- - PROFILE=(ON/OFF): compile with profiling symbols (default OFF)
- - ARMA_EXTRA_DEBUG=(ON/OFF): compile with extra Armadillo debugging symbols
-       (default OFF)
- - BUILD_TESTS=(ON/OFF): compile the \c mlpack_test program (default ON)
- - BUILD_CLI_EXECUTABLES=(ON/OFF): compile the mlpack command-line executables
-       (i.e. \c mlpack_knn, \c mlpack_kfn, \c mlpack_logistic_regression, etc.)
-       (default ON)
- - BUILD_PYTHON_BINDINGS=(ON/OFF): compile the bindings for Python, if the
-       necessary Python libraries are available (default ON except on Windows)
- - BUILD_JULIA_BINDINGS=(ON/OFF): compile Julia bindings, if Julia is found
-       (default ON)
- - BUILD_SHARED_LIBS=(ON/OFF): compile shared libraries as opposed to
-       static libraries (default ON)
- - TEST_VERBOSE=(ON/OFF): run test cases in \c mlpack_test with verbose output
-       (default OFF)
- - DISABLE_DOWNLOADS=(ON/OFF): Disable downloads of dependencies during build
-       (default OFF)
- - DOWNLOAD_ENSMALLEN=(ON/OFF): If ensmallen is not found, download it
-       (default ON)
- - DOWNLOAD_STB_IMAGE=(ON/OFF): If STB is not found, download it (default ON)
- - BUILD_WITH_COVERAGE=(ON/OFF): Build with support for code coverage tools
-      (gcc only) (default OFF)
- - PYTHON_EXECUTABLE=(/path/to/python_version): Path to specific Python executable
- - JULIA_EXECUTABLE=(/path/to/julia): Path to specific Julia executable
- - BUILD_MARKDOWN_BINDINGS=(ON/OFF): Build Markdown bindings for website
-       documentation (default OFF)
- - MATHJAX=(ON/OFF): use MathJax for generated Doxygen documentation (default
-       OFF)
- - FORCE_CXX11=(ON/OFF): assume that the compiler supports C++11 instead of
-       checking; be sure to specify any necessary flag to enable C++11 as part
-       of CXXFLAGS (default OFF)
- - USE_OPENMP=(ON/OFF): if ON, then use OpenMP if the compiler supports it; if
-       OFF, OpenMP support is manually disabled (default ON)
-
-Each option can be specified to CMake with the '-D' flag.  Other tools can also
-be used to configure CMake, but those are not documented here.
-
-In addition, the following directories may be specified, to find include files
-and libraries. These also use the '-D' flag.
-
- - ARMADILLO_INCLUDE_DIR=(/path/to/armadillo/include/): path to Armadillo headers
- - ARMADILLO_LIBRARY=(/path/to/armadillo/libarmadillo.so): location of Armadillo
-       library
- - BOOST_ROOT=(/path/to/boost/): path to root of boost installation
- - ENSMALLEN_INCLUDE_DIR=(/path/to/ensmallen/include): path to include directory
-       for ensmallen
- - STB_IMAGE_INCLUDE_DIR=(/path/to/stb/include): path to include directory for
-      STB image library
- - MATHJAX_ROOT=(/path/to/mathjax): path to root of MathJax installation
-
-@section build_build Building mlpack
-
-Once CMake is configured, building the library is as simple as typing 'make'.
-This will build all library components as well as 'mlpack_test'.
-
-@code
-$ make
-Scanning dependencies of target mlpack
-[  1%] Building CXX object
-src/mlpack/CMakeFiles/mlpack.dir/core/optimizers/aug_lagrangian/aug_lagrangian_test_functions.cpp.o
-<...>
-@endcode
-
-It's often useful to specify \c -jN to the \c make command, which will build on
-\c N processor cores.  That can accelerate the build significantly.
-
-You can specify individual components which you want to build, if you do not
-want to build everything in the library:
-
-@code
-$ make mlpack_pca mlpack_knn mlpack_kfn
-@endcode
-
-One particular component of interest is mlpack_test, which runs the mlpack test
-suite.  You can build this component with
-
-@code
-$ make mlpack_test
-@endcode
-
-and then run all of the tests, or an individual test suite:
-
-@code
-$ bin/mlpack_test
-$ bin/mlpack_test -t KNNTest
-@endcode
-
-If the build fails and you cannot figure out why, register an account on Github
-and submit an issue and the mlpack developers will quickly help you figure it
-out:
-
-https://mlpack.org/
-
-https://github.com/mlpack/mlpack
-
-Alternately, mlpack help can be found in IRC at \#mlpack on chat.freenode.net.
-
-@section install Installing mlpack
-
-If you wish to install mlpack to the system, make sure you have root privileges
-(or write permissions to those two directories), and simply type
-
-@code
-# make install
-@endcode
-
-You can now run the executables by name; you can link against mlpack with
-\c -lmlpack, and the mlpack headers are found in \c /usr/include or
-\c /usr/local/include (depending on the system and CMake configuration).  If
-Python bindings were installed, they should be available when you start Python.
-
-@section build_run Using mlpack without installing
-
-If you would prefer to use mlpack after building but without installing it to
-the system, this is possible.  All of the command-line programs in the
-@c build/bin/ directory will run directly with no modification.
-
-For running the Python bindings from the build directory, the situation is a
-little bit different.  You will need to set the following environment variables:
-
-@code
-export LD_LIBRARY_PATH=/path/to/mlpack/build/lib/:${LD_LIBRARY_PATH}
-export PYTHONPATH=/path/to/mlpack/build/src/mlpack/bindings/python/:${PYTHONPATH}
-@endcode
-
-(Be sure to substitute the correct path to your build directory for
-`/path/to/mlpack/build/`.)
-
-Once those environment variables are set, you should be able to start a Python
-interpreter and `import mlpack`, then use the Python bindings.
-
-*/
diff -pruN 3.4.2-7/doc/guide/build_windows.hpp 4.0.1-1/doc/guide/build_windows.hpp
--- 3.4.2-7/doc/guide/build_windows.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/build_windows.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,167 +0,0 @@
-/**
- * @file build_windows.hpp
- * @author German Lancioni
- * @author Miguel Canteras
- * @author Shikhar Jaiswal
- * @author Ziyang Jiang
-
-@page build_windows Building mlpack From Source on Windows
-
-@section build_windows_intro Introduction
-
-This tutorial will show you how to build mlpack for Windows from source, so you can
-later create your own C++ applications. Before you try building mlpack, you may
-want to install mlpack using vcpkg for Windows. If you don't want to install
-using vcpkg, skip this section and continue with the build tutorial.
-
-- Install Git (https://git-scm.com/downloads and execute setup)
-
-- Install CMake (https://cmake.org/ and execute setup)
-
-- Install vcpkg (https://github.com/Microsoft/vcpkg and execute setup)
-
-- To install the mlpack library only:
-
-@code
-PS> .\vcpkg install mlpack:x64-windows
-@endcode
-
-- To install mlpack and its console programs:
-@code
-PS> .\vcpkg install mlpack[tools]:x64-windows
-@endcode
-
-After installing, in Visual Studio, you can create a new project (or open
-an existing one). The library is immediately ready to be included
-(via preprocessor directives) and used in your project without additional
-configuration.
-
-@section build_windows_env Build Environment
-
-This tutorial has been designed and tested using:
-- Windows 10
-- Visual Studio 2019 (toolset v142)
-- mlpack
-- OpenBLAS.0.2.14.1
-- boost_1_71_0-msvc-14.2-64
-- armadillo (newest version)
-- and x64 configuration
-
-The directories and paths used in this tutorial are just for reference purposes.
-
-@section build_windows_prereqs Pre-requisites
-
-- Install CMake for Windows (win64-x64 version from https://cmake.org/download/)
-and make sure you can use it from the Command Prompt (may need to add the PATH to 
-system environment variables or manually set the PATH before running CMake)
-
-- Download the latest mlpack release from here:
-<a href="https://www.mlpack.org/">mlpack website</a>
-
-@section build_windows_instructions Windows build instructions
-
-- Unzip mlpack to "C:\mlpack\mlpack"
-- Open Visual Studio and select: File > New > Project from Existing Code
- - Type of project: Visual C++
- - Project location: "C:\mlpack\mlpack"
- - Project name: mlpack
- - Finish
-- Make sure the solution configuration is "Debug" and the solution platform is "x64" for this Visual Studio project
-- We will use this Visual Studio project to get the OpenBLAS dependency in the next section
-
-@section build_windows_dependencies Dependencies
-
-<b> OpenBLAS Dependency </b>
-
-- Open the NuGet packages manager (Tools > NuGet Package Manager > Manage NuGet Packages for Solution...)
-- Click on the “Browse” tab and search for “openblas”
-- Click on OpenBlas and check the mlpack project, then click Install
-- Once it has finished installing, close Visual Studio
-
-<b> Boost Dependency </b>
-
-You can either get Boost via NuGet or you can download the prebuilt Windows binaries separately.
-This tutorial follows the second approach for simplicity.
-
-- Download the "Prebuilt Windows binaries" of the Boost library ("boost_1_71_0-msvc-14.2-64") from
-<a href="https://sourceforge.net/projects/boost/files/boost-binaries/">Sourceforge</a>
-
-@note Make sure you download the MSVC version that matches your Visual Studio
-
-- Install or unzip to "C:\boost\"
-
-<b> Armadillo Dependency </b>
-
-- Download the newest version of Armadillo from <a href="http://arma.sourceforge.net/download.html">Sourceforge</a>
-- Unzip to "C:\mlpack\armadillo"
-- Create a "build" directory into "C:\mlpack\armadillo\"
-- Open the Command Prompt and navigate to "C:\mlpack\armadillo\build"
-- Run cmake:
-
-@code
-cmake -G "Visual Studio 16 2019" -A x64 -DBLAS_LIBRARY:FILEPATH="C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" ..
-@endcode
-
-@note If you are using different directory paths, a different configuration (e.g. Release)
-or a different VS version, update the cmake command accordingly. If CMake cannot identify the 
-compiler version, check if the Visual Studio compiler and Windows SDK are installed correctly.
-
-- Once it has successfully finished, open "C:\mlpack\armadillo\build\armadillo.sln"
-- Build > Build Solution
-- Once it has successfully finished, close Visual Studio
-
-@section build_windows_mlpack Building mlpack
-
-- Create a "build" directory into "C:\mlpack\mlpack\"
-- You can generate the project using either cmake via command line or GUI. If you prefer to use GUI, refer to the \ref build_windows_appendix "appendix"
-- To use the CMake command line prompt, open the Command Prompt and navigate to "C:\mlpack\mlpack\build"
-- Run cmake:
-
-@code
-cmake -G "Visual Studio 16 2019" -A x64 -DBLAS_LIBRARIES:FILEPATH="C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARIES:FILEPATH="C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DARMADILLO_INCLUDE_DIR="C:/mlpack/armadillo/include" -DARMADILLO_LIBRARY:FILEPATH="C:/mlpack/armadillo/build/Debug/armadillo.lib" -DBOOST_INCLUDEDIR:PATH="C:/boost/" -DBOOST_LIBRARYDIR:PATH="C:/boost/lib64-msvc-14.2" -DDEBUG=OFF -DPROFILE=OFF ..
-@endcode
-
-@note cmake will attempt to automatically download the ensmallen dependency. If for some reason cmake can't download the dependency, you will need to manually download ensmallen from http://ensmallen.org/ and extract it to "C:\mlpack\mlpack\deps\". Then, specify the path to ensmallen using the flag: -DENSMALLEN_INCLUDE_DIR=C:/mlpack/mlpack/deps/ensmallen/include
-
-- Once CMake configuration has successfully finished, open "C:\mlpack\mlpack\build\mlpack.sln"
-- Build > Build Solution (this may be by default in Debug mode)
-- Once it has sucessfully finished, you will find the library files you need in: "C:\mlpack\mlpack\build\Debug" (or "C:\mlpack\mlpack\build\Release" if you changed to Release mode)
-
-You are ready to create your first application, take a look at the @ref sample_ml_app "Sample C++ ML App"
-
-@section build_windows_appendix Appendix
-
-If you prefer to use cmake GUI, follow these instructions:
-
-  - To use the CMake GUI, open "CMake".
-    - For "Where is the source code:" set `C:\mlpack\mlpack\`
-    - For "Where to build the binaries:" set `C:\mlpack\mlpack\build`
-    - Click `Configure`
-    - If there is an error and Armadillo is not found, try "Add Entry" with the
-      following variables and reconfigure:
-      - Name: `ARMADILLO_INCLUDE_DIR`; type `PATH`; value `C:/mlpack/armadillo/include/`
-      - Name: `ARMADILLO_LIBRARY`; type `FILEPATH`; value `C:/mlpack/armadillo/build/Debug/armadillo.lib`
-      - Name: `BLAS_LIBRARY`; type `FILEPATH`; value `C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a`
-      - Name: `LAPACK_LIBRARY`; type `FILEPATH`; value `C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a`
-    - If there is an error and Boost is not found, try "Add Entry" with the
-      following variables and reconfigure:
-      - Name: `BOOST_INCLUDEDIR`; type `PATH`; value `C:/boost/`
-      - Name: `BOOST_LIBRARYDIR`; type `PATH`; value `C:/boost/lib64-msvc-14.2`
-    - If Boost is still not found, try adding the following variables and
-      reconfigure:
-      - Name: `Boost_INCLUDE_DIR`; type `PATH`; value `C:/boost/`
-      - Name: `Boost_SERIALIZATION_LIBRARY_DEBUG`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.2/boost_serialization-vc142-mt-gd-x64-1_71.lib`
-      - Name: `Boost_SERIALIZATION_LIBRARY_RELEASE`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.2/boost_serialization-vc142-mt-x64-1_71.lib`
-      - Name: `Boost_UNIT_TEST_FRAMEWORK_LIBRARY_DEBUG`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.2/boost_unit_test_framework-vc142-mt-gd-x64-1_71.lib`
-      - Name: `Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.2/boost_unit_test_framework-vc142-mt-x64-1_71.lib`
-    - Once CMake has configured successfully, hit "Generate" to create the `.sln` file.
-
-@section build_windows_additional_information Additional Information
-
-If you are facing issues during the build process of mlpack, you may take a look at other third-party tutorials for Windows, but they may be out of date:
-
- * <a href="https://github.com/mlpack/mlpack/wiki/WindowsBuild">Github wiki Windows Build page</a><br/>
- * <a href="http://keon.io/mlpack-on-windows">Keon's tutorial for mlpack 2.0.3</a><br/>
- * <a href="https://overdosedblog.wordpress.com/2016/08/15/once_again/">Kirizaki's tutorial for mlpack 2</a><br/>
-
-*/
diff -pruN 3.4.2-7/doc/guide/cli_quickstart.hpp 4.0.1-1/doc/guide/cli_quickstart.hpp
--- 3.4.2-7/doc/guide/cli_quickstart.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/cli_quickstart.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,211 +0,0 @@
-/**
- * @file cli_quickstart.hpp
- * @author Ryan Curtin
-
-@page cli_quickstart mlpack command-line quickstart guide
-
-@section cli_quickstart_intro Introduction
-
-This page describes how you can quickly get started using mlpack from the
-command-line and gives a few examples of usage, and pointers to deeper
-documentation.
-
-This quickstart guide is also available for @ref python_quickstart "Python",
-@ref r_quickstart "R", @ref julia_quickstart "Julia" and
-@ref go_quickstart "Go".
-
-@section cli_quickstart_install Installing mlpack
-
-Installing the mlpack is straightforward and can be done with your system's
-package manager.
-
-For instance, for Ubuntu or Debian the command is simply
-
-@code{.sh}
-sudo apt-get install mlpack-bin
-@endcode
-
-On Fedora or Red Hat:
-
-@code{.sh}
-sudo dnf install mlpack
-@endcode
-
-If you use a different distribution, mlpack may be packaged under a different
-name.  And if it is not packaged, you can use a Docker image from Dockerhub:
-
-@code{.sh}
-docker run -it mlpack/mlpack /bin/bash
-@endcode
-
-This Docker image has mlpack already built and installed.
-
-If you prefer to build mlpack from scratch, see @ref build.
-
-@section cli_quickstart_example Simple mlpack quickstart example
-
-As a really simple example of how to use mlpack from the command-line, let's do
-some simple classification on a subset of the standard machine learning @c
-covertype dataset.  We'll first split the dataset into a training set and a
-testing set, then we'll train an mlpack random forest on the training data, and
-finally we'll print the accuracy of the random forest on the test dataset.
-
-You can copy-paste this code directly into your shell to run it.
-
-@code{.sh}
-# Get the dataset and unpack it.
-wget https://www.mlpack.org/datasets/covertype-small.data.csv.gz
-wget https://www.mlpack.org/datasets/covertype-small.labels.csv.gz
-gunzip covertype-small.data.csv.gz covertype-small.labels.csv.gz
-
-# Split the dataset; 70% into a training set and 30% into a test set.
-# Each of these options has a shorthand single-character option but here we type
-# it all out for clarity.
-mlpack_preprocess_split                                       \
-    --input_file covertype-small.data.csv                     \
-    --input_labels_file covertype-small.labels.csv            \
-    --training_file covertype-small.train.csv                 \
-    --training_labels_file covertype-small.train.labels.csv   \
-    --test_file covertype-small.test.csv                      \
-    --test_labels_file covertype-small.test.labels.csv        \
-    --test_ratio 0.3                                          \
-    --verbose
-
-# Train a random forest.
-mlpack_random_forest                                  \
-    --training_file covertype-small.train.csv         \
-    --labels_file covertype-small.train.labels.csv    \
-    --num_trees 10                                    \
-    --minimum_leaf_size 3                             \
-    --print_training_accuracy                         \
-    --output_model_file rf-model.bin                  \
-    --verbose
-
-# Now predict the labels of the test points and print the accuracy.
-# Also, save the test set predictions to the file 'predictions.csv'.
-mlpack_random_forest                                    \
-    --input_model_file rf-model.bin                     \
-    --test_file covertype-small.test.csv                \
-    --test_labels_file covertype-small.test.labels.csv  \
-    --predictions_file predictions.csv                  \
-    --verbose
-@endcode
-
-We can see by looking at the output that we achieve reasonably good accuracy on
-the test dataset (80%+).  The file @c predictions.csv could also be used by
-other tools; for instance, we can easily calculate the number of points that
-were predicted incorrectly:
-
-@code{.sh}
-$ diff -U 0 predictions.csv covertype-small.test.labels.csv | grep '^@@' | wc -l
-@endcode
-
-It's easy to modify the code above to do more complex things, or to use
-different mlpack learners, or to interface with other machine learning toolkits.
-
-@section cli_quickstart_whatelse What else does mlpack implement?
-
-The example above has only shown a little bit of the functionality of mlpack.
-Lots of other commands are available with different functionality.  A full list
-of commands and full documentation for each can be found on the following page:
-
- - <a href="https://mlpack.org/doc/mlpack-git/cli_documentation.html">CLI documentation</a>
-
-For more information on what mlpack does, see https://www.mlpack.org/.  Next,
-let's go through another example for providing movie recommendations with
-mlpack.
-
-@section cli_quickstart_movierecs Using mlpack for movie recommendations
-
-In this example, we'll train a collaborative filtering model using mlpack's
-@c mlpack_cf program.  We'll train this on the MovieLens dataset from
-https://grouplens.org/datasets/movielens/, and then we'll use the model that we
-train to give recommendations.
-
-You can copy-paste this code directly into the command line to run it.
-
-@code{.sh}
-wget https://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz
-wget https://www.mlpack.org/datasets/ml-20m/movies.csv.gz
-gunzip ratings-only.csv.gz
-gunzip movies.csv.gz
-
-# Hold out 10% of the dataset into a test set so we can evaluate performance.
-mlpack_preprocess_split                 \
-    --input_file ratings-only.csv       \
-    --training_file ratings-train.csv   \
-    --test_file ratings-test.csv        \
-    --test_ratio 0.1                    \
-    --verbose
-
-# Train the model.  Change the rank to increase/decrease the complexity of the
-# model.
-mlpack_cf                             \
-    --training_file ratings-train.csv \
-    --test_file ratings-test.csv      \
-    --rank 10                         \
-    --algorithm RegSVD                \
-    --output_model_file cf-model.bin  \
-    --verbose
-
-# Now query the 5 top movies for user 1.
-echo "1" > query.csv;
-mlpack_cf                             \
-    --input_model_file cf-model.bin   \
-    --query_file query.csv            \
-    --recommendations 10              \
-    --output_file recommendations.csv \
-    --verbose
-
-# Get the names of the movies for user 1.
-echo "Recommendations for user 1:"
-for i in `seq 1 10`; do
-    item=`cat recommendations.csv | awk -F',' '{ print $'$i' }'`;
-    head -n $(($item + 2)) movies.csv | tail -1 | \
-        sed 's/^[^,]*,[^,]*,//' | \
-        sed 's/\(.*\),.*$/\1/' | sed 's/"//g';
-done
-@endcode
-
-Here is some example output, showing that user 1 seems to have good taste in
-movies:
-
-@code{.unparsed}
-Recommendations for user 1:
-Casablanca (1942)
-Pan's Labyrinth (Laberinto del fauno, El) (2006)
-Godfather, The (1972)
-Answer This! (2010)
-Life Is Beautiful (La Vita è bella) (1997)
-Adventures of Tintin, The (2011)
-Dark Knight, The (2008)
-Out for Justice (1991)
-Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
-Schindler's List (1993)
-@endcode
-
-@section cli_quickstart_nextsteps Next steps with mlpack
-
-Now that you have done some simple work with mlpack, you have seen how it can
-easily plug into a data science production workflow for the command line.  A
-great thing to do next would be to look at more documentation for the mlpack
-command-line programs:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/cli_documentation.html">mlpack
-   command-line program documentation</a>
-
-Also, mlpack is much more flexible from C++ and allows much greater
-functionality.  So, more complicated tasks are possible if you are willing to
-write C++.  To get started learning about mlpack in C++, the following resources
-might be helpful:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/tutorials.html">mlpack
-   C++ tutorials</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/build.html">mlpack
-   build and installation guide</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/sample.html">Simple
-   sample C++ mlpack programs</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/index.html">mlpack
-   Doxygen documentation homepage</a>
-
- */
diff -pruN 3.4.2-7/doc/guide/cv.hpp 4.0.1-1/doc/guide/cv.hpp
--- 3.4.2-7/doc/guide/cv.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/cv.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,372 +0,0 @@
-namespace mlpack {
-namespace cv {
-
-/*! @page cv Cross-Validation
-
-@section cvintro Introduction
-
-@b mlpack implements cross-validation support for its learning algorithms, for a
-variety of performance measures.  Cross-validation is useful for determining an
-estimate of how well the learner will generalize to un-seen test data.  It is a
-commonly used part of the data science pipeline.
-
-In short, given some learner and some performance measure, we wish to get an
-average of the performance measure given different splits of the dataset into
-training data and validation data.  The learner is trained on the training data,
-and the performance measure is evaluated on the validation data.
-
-mlpack currently implements two easy-to-use forms of cross-validation:
-
- - @b simple @b cross-validation, where we simply desire the performance measure
-   on a single split of the data into a training set and validation set
-
- - @b k-fold @b cross-validation, where we split the data k ways and desire the
-   average performance measure on each of the k splits of the data
-
-In this tutorial we will see the usage examples and details of the
-cross-validation module.  Because the cross-validation code is generic and can
-be used with any learner and performance measure, any use of the
-cross-validation code in mlpack has to be in C++.
-
-This tutorial is split into the following sections:
-
- - @ref cvbasic Simple cross-validation examples
-   - @ref cvbasic_ex_1 10-fold cross-validation on softmax regression
-   - @ref cvbasic_ex_2 10-fold cross-validation on weighted decision trees
-   - @ref cvbasic_ex_3 10-fold cross-validation with categorical decision trees
-   - @ref cvbasic_ex_4 Simple cross-validation for linear regression
- - @ref cvbasic_metrics Performance measures
- - @ref cvbasic_api The \c KFoldCV and \c SimpleCV classes
- - @ref cvbasic_further Further reference
-
-@section cvbasic Simple cross-validation examples
-
-@subsection cvbasic_ex_1 10-fold cross-validation on softmax regression
-
-Suppose we have some data to train and validate on, as defined below:
-
-@code
-  // 100-point 6-dimensional random dataset.
-  arma::mat data = arma::randu<arma::mat>(6, 100);
-  // Random labels in the [0, 4] interval.
-  arma::Row<size_t> labels =
-      arma::randi<arma::Row<size_t>>(100, arma::distr_param(0, 4));
-  size_t numClasses = 5;
-@endcode
-
-The code above generates an 100-point random 6-dimensional dataset with 5
-classes.
-
-To run 10-fold cross-validation for softmax regression with accuracy as a
-performance measure, we can write the following piece of code.
-
-@code
-  KFoldCV<SoftmaxRegression, Accuracy> cv(10, data, labels, numClasses);
-  double lambda = 0.1;
-  double softmaxAccuracy = cv.Evaluate(lambda);
-@endcode
-
-Note that the \c Evaluate method of \c KFoldCV takes any hyperparameters of an
-algorithm---that is, anything that is not \c data, \c labels, \c numClasses,
-\c datasetInfo, or \c weights (those last three may not be present for every
-algorithm type).  To be more specific, in this example the \c Evaluate method
-relies on the following \ref regression::SoftmaxRegression "SoftmaxRegression"
-constructor:
-
-@code
-  template<typename OptimizerType = mlpack::optimization::L_BFGS>
-  SoftmaxRegression(const arma::mat& data,
-                    const arma::Row<size_t>& labels,
-                    const size_t numClasses,
-                    const double lambda = 0.0001,
-                    const bool fitIntercept = false,
-                    OptimizerType optimizer = OptimizerType());
-@endcode
-
-which has the parameter \c lambda after three conventional arguments (\c data,
-\c labels and \c numClasses). We can skip passing \c fitIntercept and \c
-optimizer since there are the default values.  (Technically, we don't even need
-to pass \c lambda since there is a default value.)
-
-In general to cross-validate you need to specify what machine learning algorithm
-and metric you are going to use, and then to pass some conventional data-related
-parameters into one of the cross-validation constructors and all other
-parameters (which are generally hyperparameters) into the \c Evaluate method.
-
-@subsection cvbasic_ex_2 10-fold cross-validation on weighted decision trees
-
-In the following example we will cross-validate
-\ref tree::DecisionTree "DecisionTree" with weights.  This is very similar to
-the previous example, except that we also have instance weights for each point
-in the dataset.  We can generate weights for the dataset from the previous
-example with the code below:
-
-@code
-  // Random weights for every point from the code snippet above.
-  arma::rowvec weights = arma::randu<arma::mat>(1, 100);
-@endcode
-
-Given those weights for each point, we can now perform cross-validation by also
-passing the weights to the constructor of \c KFoldCV:
-
-@code
-  KFoldCV<DecisionTree<>, Accuracy> cv2(10, data, labels, numClasses, weights);
-  size_t minimumLeafSize = 8;
-  double weightedDecisionTreeAccuracy = cv2.Evaluate(minimumLeafSize);
-@endcode
-
-As with the previous example, internally this call to \c cv2.Evaluate() relies
-on the following \ref tree::DecisionTree "DecisionTree" constructor:
-
-@code
-  template<typename MatType, typename LabelsType, typename WeightsType>
-  DecisionTree(MatType&& data,
-               LabelsType&& labels,
-               const size_t numClasses,
-               WeightsType&& weights,
-               const size_t minimumLeafSize = 10,
-               const std::enable_if_t<arma::is_arma_type<
-                   typename std::remove_reference<WeightsType>::type>::value>*
-                    = 0);
-@endcode
-
-@subsection cvbasic_ex_3 10-fold cross-validation with categorical decision trees
-
-\ref tree::DecisionTree "DecisionTree" models can be constructed in multiple
-other ways. For example, if we have a dataset with both categorical and
-numerical features, we can also perform cross-validation by using the associated
-\c data::DatasetInfo object.  Thus, given some \c data::DatasetInfo object
-called \c datasetInfo (that perhaps was produced by a call to \c data::Load() ),
-we can perform k-fold cross-validation in a similar manner to the other
-examples:
-
-@code
-  KFoldCV<DecisionTree<>, Accuracy> cv3(10, data, datasetInfo, labels,
-      numClasses);
-  double decisionTreeWithDIAccuracy = cv3.Evaluate(minimumLeafSize);
-@endcode
-
-This particular call to \c cv3.Evaluate() relies on the following
-\ref tree::DecisionTree "DecisionTree" constructor:
-
-@code
-  template<typename MatType, typename LabelsType>
-  DecisionTree(MatType&& data,
-               const data::DatasetInfo& datasetInfo,
-               LabelsType&& labels,
-               const size_t numClasses,
-               const size_t minimumLeafSize = 10);
-@endcode
-
-@subsection cvbasic_ex_4 Simple cross-validation for linear regression
-
-\c SimpleCV has the same interface as \c KFoldCV, except it takes as one of its
-arguments a proportion (from 0 to 1) of data used as a validation set. For
-example, to validate \ref regression::LinearRegression "LinearRegression" with
-20\% of the data used in the validation set we can write the following code.
-
-@code
-  // Random responses for every point from the code snippet in the beginning of
-  // the tutorial.
-  arma::rowvec responses = arma::randu<arma::rowvec>(100);
-
-  SimpleCV<LinearRegression, MSE> cv4(0.2, data, responses);
-  double lrLambda = 0.05;
-  double lrMSE = cv4.Evaluate(lrLambda);
-@endcode
-
-@section cvbasic_metrics Performance measures
-
-The cross-validation classes require a performance measure to be specified.
-\b mlpack has a number of performance measures implemented; below is a list:
-
- - mlpack::cv::Accuracy: a simple measure of accuracy
- - mlpack::cv::F1: the F1 score; depends on an averaging strategy
- - mlpack::cv::MSE: minimum squared error (for regression problems)
- - mlpack::cv::Precision: the precision, for classification problems
- - mlpack::cv::Recall: the recall, for classification problems
-
-In addition, it is not difficult to implement a custom performance measure.  A
-class following the structure below can be used:
-
-@code
-class CustomMeasure
-{
-  //
-  // This evaluates the metric given a trained model and a set of data (with
-  // labels or responses) to evaluate on.  The data parameter will be a type of
-  // Armadillo matrix, and the labels will be the labels that go with the model.
-  //
-  // If you know that your model is a classification model (and thus that
-  // ResponsesType will be arma::Row<size_t>), it is ok to replace the
-  // ResponsesType template parameter with arma::Row<size_t>.
-  //
-  template<typename MLAlgorithm, typename DataType, typename ResponsesType>
-  static double Evaluate(MLAlgorithm& model,
-                         const DataType& data,
-                         const ResponsesType& labels)
-  {
-    // Inside the method you should call model.Predict() and compare the
-    // values with the labels, in order to get the desired performance measure
-    // and return it.
-  }
-};
-@endcode
-
-Once this is implemented, then \c CustomMeasure (or whatever the class is
-called) is easy to use as a custom performance measure with \c KFoldCV or
-\c SimpleCV.
-
-@section cvbasic_api The KFoldCV and SimpleCV classes
-
-This section provides details about the \c KFoldCV and \c SimpleCV classes.
-The cross-validation infrastructure is based on heavy amounts of template
-metaprogramming, so that any \b mlpack learner and any performance measure can
-be used.  Both classes have two required template parameters and one optional
-parameter:
-
- - \c MLAlgorithm: the type of learner to be used
- - \c Metric: the performance measure to be evaluated
- - \c MatType: the type of matrix used to store the data
-
-In addition, there are two more template parameters, but these are automatically
-extracted from the given \c MLAlgorithm class, and users should not need to
-specify these parameters except when using an unconventional type like
-\c arma::fmat for data points.
-
-The general structure of the \c KFoldCV and \c SimpleCV classes is split into
-two parts:
-
- - The constructor: create the object, and store the data for the \c MLAlgorithm
-        training.
- - The \c Evaluate() method: take any non-data parameters for the
-        \c MLAlgorithm and calculate the desired performance measure.
-
-This split is important because it defines the API: all data-related parameters
-are passed to the constructor, whereas algorithm hyperparameters are passed to
-the \c Evaluate() method.
-
-@subsection cvbasic_api_constructor The KFoldCV and SimpleCV constructors
-
-There are six constructors available for \c KFoldCV and \c SimpleCV, each
-tailored for a different learning situation.  Each is given below for the
-\c KFoldCV class, but the same constructors are also available for the
-\c SimpleCV class, with the exception that instead of specifying \c k, the
-number of folds, the \c SimpleCV class takes a parameter between 0 and 1
-specifying the percentage of the dataset to use as a validation set.
-
- - `KFoldCV(k, xs, ys)`: this is for unweighted regression applications and
-        two-class classification applications; \c xs is the dataset and \c ys
-        are the responses or labels for each point in the dataset.
-
- - `KFoldCV(k, xs, ys, numClasses)`: this is for unweighted classification
-        applications; \c xs is the dataset, \c ys are the class labels for each
-        data point, and \c numClasses is the number of classes in the dataset.
-
- - `KFoldCV(k, xs, datasetInfo, ys, numClasses)`: this is for unweighted
-        categorical/numeric classification applications; \c xs is the dataset,
-        \c datasetInfo is a data::DatasetInfo object that holds the types of
-        each dimension in the dataset, \c ys are the class labels for each data
-        point, and \c numClasses is the number of classes in the dataset.
-
- - `KFoldCV(k, xs, ys, weights)`: this is for weighted regression or
-        two-class classification applications; \c xs is the dataset, \c ys are
-        the responses or labels for each point in the dataset, and \c weights
-        are the weights for each point in the dataset.
-
- - `KFoldCV(k, xs, ys, numClasses, weights)`: this is for weighted
-        classification applications; \c xs is the dataset, \c ys are the class
-        labels for each point in the dataset; \c numClasses is the number of
-        classes in the dataset, and \c weights holds the weights for each point
-        in the dataset.
-
- - `KFoldCV(k, xs, datasetInfo, ys, numClasses, weights)`: this is for
-        weighted cateogrical/numeric classification applications; \c xs is the
-        dataset, \c datasetInfo is a data::DatasetInfo object that holds the
-        types of each dimension in the dataset, \c ys are the class labels for
-        each data point, \c numClasses is the number of classes in each dataset,
-        and \c weights holds the weights for each point in the dataset.
-
-Note that the constructor you should use is the constructor that most closely
-matches the constructor of the machine learning algorithm you would like
-performance measures of.  So, for instance, if you are doing multi-class softmax
-regression, you could call the constructor
-\c "SoftmaxRegression(xs, ys, numClasses)".  Therefore, for \c KFoldCV you would
-call the constructor \c "KFoldCV(k, xs, ys, numClasses)" and for \c SimpleCV you
-would call the constructor \c "SimpleCV(pct, xs, ys, numClasses)".
-
-@subsection cvbasic_api_evaluate The Evaluate() method
-
-The other method that \c KFoldCV and \c SimpleCV have is the method to
-actually calculate the performance measure: \c Evaluate().  The \c Evaluate()
-method takes any hyperparameters that would follow the data arguments to the
-constructor or \c Train() method of the given \c MLAlgorithm.  The
-\c Evaluate() method takes no more arguments than that, and returns the
-desired performance measure on the dataset.
-
-Therefore, let us suppose that we are interested in cross-validating the
-performance of a softmax regression model, and that we have constructed
-the appropriate \c KFoldCV object using the code below:
-
-@code
-KFoldCV<SoftmaxRegression, Precision> cv(k, data, labels, numClasses);
-@endcode
-
-The \ref regression::SoftmaxRegression "SoftmaxRegression" class has the
-constructor
-
-@code
-  template<typename OptimizerType = mlpack::optimization::L_BFGS>
-  SoftmaxRegression(const arma::mat& data,
-                    const arma::Row<size_t>& labels,
-                    const size_t numClasses,
-                    const double lambda = 0.0001,
-                    const bool fitIntercept = false,
-                    OptimizerType optimizer = OptimizerType());
-@endcode
-
-Note that all parameters after are \c numClasses are optional.  This means that
-we can specify none or any of them in our call to \c Evaluate().  Below is some
-example code showing three different ways we can call \c Evaluate() with the
-\c cv object from the code snippet above.
-
-@code
-// First, call with all defaults.
-double result1 = cv.Evaluate();
-
-// Next, call with lambda set to 0.1 and fitIntercept set to true.
-double result2 = cv.Evaluate(0.1, true);
-
-// Lastly, create a custom optimizer to use for optimization, and use a lambda
-// value of 0.5 and fit no intercept.
-optimization::SGD<> sgd(0.05, 50000); // Step size of 0.05, 50k max iterations.
-double result3 = cv.Evaluate(0.5, false, sgd);
-@endcode
-
-The same general idea applies to any \c MLAlgorithm: all hyperparameters must be
-passed to the \c Evaluate() method of \c KFoldCV or \c SimpleCV.
-
-@section cvbasic_further Further references
-
-For further documentation, please see the associated Doxygen documentation for
-each of the relevant classes:
-
- - mlpack::cv::SimpleCV
- - mlpack::cv::KFoldCV
- - mlpack::cv::Accuracy
- - mlpack::cv::F1
- - mlpack::cv::MSE
- - mlpack::cv::Precision
- - mlpack::cv::Recall
-
-If you are interested in implementing a different cross-validation strategy than
-k-fold cross-validation or simple cross-validation, take a look at the
-implementations of each of those classes to guide your implementation.
-
-In addition, the @ref hpt "hyperparameter tuner" documentation may also be
-relevant.
-
-*/
-
-} // namespace cv
-} // namespace mlpack
diff -pruN 3.4.2-7/doc/guide/formats.hpp 4.0.1-1/doc/guide/formats.hpp
--- 3.4.2-7/doc/guide/formats.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/formats.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,535 +0,0 @@
-/*! @page formatdoc File formats and loading data in mlpack
-
-@section formatintro Introduction
-
-mlpack supports a wide variety of data (including images) and model formats for use in both its
-command-line programs and in C++ programs using mlpack via the
-mlpack::data::Load() function.  This tutorial discusses the formats that are
-supported and how to use them.
-
-@section toc_tut Table of Contents
-
-This tutorial is split into the following sections:
-
- - \ref formatintro
- - \ref toc_tut
- - Data
-    - Data Formats
-        - \ref formatsimple
-        - \ref formattypes
-        - \ref formatcpp
-        - \ref sparseload
-        - \ref formatcat
-        - \ref formatcatcpp
-    - Image Support
-        - \ref intro_imagetut
-        - \ref model_api_imagetut
-        - \ref imageinfo_api_imagetut
-        - \ref load_api_imagetut
-        - \ref save_api_imagetut
- - Models
-    - \ref formatmodels
-    - \ref formatmodelscpp
- - \ref formatfinal
-
-@section formatsimple Simple examples to load data in C++
-
-The example code snippets below load data from different formats into an
-Armadillo matrix object (\c arma::mat) or model when using C++.
-
-@code
-using namespace mlpack;
-
-arma::mat matrix1;
-data::Load("dataset.csv", matrix1);
-@endcode
-
-@code
-using namespace mlpack;
-
-arma::mat matrix2;
-data::Load("dataset.bin", matrix2);
-@endcode
-
-@code
-using namespace mlpack;
-
-arma::mat matrix3;
-data::Load("dataset.h5", matrix3);
-@endcode
-
-@code
-using namespace mlpack;
-
-// ARFF loading is a little different, since sometimes mapping has to be done
-// for string types.
-arma::mat matrix4;
-data::DatasetInfo datasetInfo;
-data::Load("dataset.arff", matrix4, datasetInfo);
-
-// The datasetInfo object now holds information about each dimension.
-@endcode
-
-@code
-using namespace mlpack;
-
-regression::LogisticRegression lr;
-data::Load("model.bin", "logistic_regression_model", lr);
-@endcode
-
-@section formattypes Supported dataset types
-
-Datasets in mlpack are represented internally as sparse or dense numeric
-matrices (specifically, as \c arma::mat or \c arma::sp_mat or similar).  This
-means that when datasets are loaded from file, they must be converted to a
-suitable numeric representation.  Therefore, in general, datasets on disk should
-contain only numeric features in order to be loaded successfully by mlpack.
-
-The types of datasets that mlpack can load are roughly the same as the types of
-matrices that Armadillo can load.  However, the load functionality that mlpack
-provides <b>only supports loading dense datasets</b>.  When datasets are loaded
-by mlpack, <b>the file's type is detected using the file's extension</b>.
-mlpack supports the following file types:
-
- - csv (comma-separated values), denoted by .csv or .txt
- - tsv (tab-separated values), denoted by .tsv, .csv, or .txt
- - ASCII (raw ASCII, with space-separated values), denoted by .txt
- - Armadillo ASCII (Armadillo's text format with a header), denoted by .txt
- - PGM, denoted by .pgm
- - PPM, denoted by .ppm
- - Armadillo binary, denoted by .bin
- - Raw binary, denoted by .bin <b>(note: this will be loaded as
-   one-dimensional data, which is likely not what is desired.)</b>
- - HDF5, denoted by .hdf, .hdf5, .h5, or .he5 (<b>note: HDF5 must be enabled
-   in the Armadillo configuration</b>)
- - ARFF, denoted by .arff (<b>note: this is not supported by all mlpack
-   command-line programs </b>; see \ref formatcat)
-
-Datasets that are loaded by mlpack should be stored with <b>one row for
-one point</b> and <b>one column for one dimension</b>.  Therefore, a dataset
-with three two-dimensional points \f$(0, 1)\f$, \f$(3, 1)\f$, and \f$(5, -5)\f$
-would be stored in a csv file as:
-
-\code
-0, 1
-3, 1
-5, -5
-\endcode
-
-As noted earlier, for command-line programs, the format is automatically
-detected at load time.  Therefore, a dataset can be loaded in many ways:
-
-\code
-$ mlpack_logistic_regression -t dataset.csv -v
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 32 x 37749.
-...
-
-$ mlpack_logistic_regression -t dataset.txt -v
-[INFO ] Loading 'dataset.txt' as raw ASCII formatted data.  Size is 32 x 37749.
-...
-
-$ mlpack_logistic_regression -t dataset.h5 -v
-[INFO ] Loading 'dataset.h5' as HDF5 data.  Size is 32 x 37749.
-...
-\endcode
-
-Similarly, the format to save to is detected by the extension of the given
-filename.
-
-@section formatcpp Loading simple matrices in C++
-
-When C++ is being written, the mlpack::data::Load() and mlpack::data::Save()
-functions are used to load and save datasets, respectively.  These functions
-should be preferred over the built-in Armadillo \c .load() and \c .save()
-functions.
-
-Matrices in mlpack are column-major, meaning that each column should correspond
-to a point in the dataset and each row should correspond to a dimension; for
-more information, see \ref matrices.  This is at odds with how the data is
-stored in files; therefore, a transposition is required during load and save.
-The mlpack::data::Load() and mlpack::data::Save() functions do this
-automatically (unless otherwise specified), which is why they are preferred over
-the Armadillo functions.
-
-To load a matrix from file, the call is straightforward.  After creating a
-matrix object, the data can be loaded:
-
-\code
-arma::mat dataset; // The data will be loaded into this matrix.
-mlpack::data::Load("dataset.csv", dataset);
-\endcode
-
-Saving matrices is equally straightforward.  The code below generates a random
-matrix with 10 points in 3 dimensions and saves it to a file as HDF5.
-
-\code
-// 3 dimensions (rows), with 10 points (columns).
-arma::mat dataset = arma::randu<arma::mat>(3, 10);
-mlpack::data::Save("dataset.h5", dataset);
-\endcode
-
-As with the command-line programs, the type of data to be loaded is
-automatically detected from the filename extension.  For more details, see the
-mlpack::data::Load() and mlpack::data::Save() documentation.
-
-@section sparseload Dealing with sparse matrices
-
-As mentioned earlier, support for loading sparse matrices in mlpack is not
-available at this time.  To use a sparse matrix with mlpack code, you will have
-to write a C++ program instead of using any of the command-line tools, because
-the command-line tools all use dense datasets internally.  (There is one
-exception: the \c mlpack_cf program, for collaborative filtering, loads sparse
-coordinate lists.)
-
-In addition, the \c mlpack::data::Load() function does not support loading any
-sparse format; so the best idea is to use undocumented Armadillo functionality
-to load coordinate lists.  Suppose you have a coordinate list file like the one
-below:
-
-\code
-$ cat cl.csv
-0 0 0.332
-1 3 3.126
-4 4 1.333
-\endcode
-
-This represents a 5x5 matrix with three nonzero elements.  We can load this
-using Armadillo:
-
-\code
-arma::sp_mat matrix;
-matrix.load("cl.csv", arma::coord_ascii);
-matrix = matrix.t(); // We must transpose after load!
-\endcode
-
-The transposition after loading is necessary if the coordinate list is in
-row-major format (that is, if each row in the matrix represents a point and each
-column represents a feature).  Be sure that the matrix you use with mlpack
-methods has points as columns and features as rows!  See \ref matrices for more
-information.
-
-@section formatcat Categorical features and command line programs
-
-In some situations it is useful to represent data not just as a numeric matrix
-but also as categorical data (i.e. with numeric but unordered categories).  This
-support is useful for, e.g., decision trees and other models that support
-categorical features.
-
-In some machine learning situations, such as, e.g., decision trees, categorical
-data can be used.  Categorical data might look like this (in CSV format):
-
-\code
-0, 1, "true", 3
-5, -2, "false", 5
-2, 2, "true", 4
-3, -1, "true", 3
-4, 4, "not sure", 0
-0, 7, "false", 6
-\endcode
-
-In the example above, the third dimension (which takes values "true", "false",
-and "not sure") is categorical.  mlpack can load and work with this data, but
-the strings must be mapped to numbers, because all dataset in mlpack are
-represented by Armadillo matrix objects.
-
-From the perspective of an mlpack command-line program, this support is
-transparent; mlpack will attempt to load the data file, and if it detects
-entries in the file that are not numeric, it will map them to numbers and then
-print, for each dimension, the number of mappings.  For instance, if we run the
-\c mlpack_hoeffding_tree program (which supports categorical data) on the
-dataset above (stored as dataset.csv), we receive this output during loading:
-
-\code
-$ mlpack_hoeffding_tree -t dataset.csv -l dataset.labels.csv -v
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 6 x 4.
-[INFO ] 0 mappings in dimension 0.
-[INFO ] 0 mappings in dimension 1.
-[INFO ] 3 mappings in dimension 2.
-[INFO ] 0 mappings in dimension 3.
-...
-\endcode
-
-Currently, only the \c mlpack_hoeffding_tree program supports loading
-categorical data, and this is also the only program that supports loading an
-ARFF dataset.
-
-@section formatcatcpp Categorical features and C++
-
-When writing C++, loading categorical data is slightly more tricky: the mappings
-from strings to integers must be preserved.  This is the purpose of the
-mlpack::data::DatasetInfo class, which stores these mappings and can be used and
-load and save time to apply and de-apply the mappings.
-
-When loading a dataset with categorical data, the overload of
-mlpack::data::Load() that takes an mlpack::data::DatasetInfo object should be
-used.  An example is below:
-
-\code
-arma::mat dataset; // Load into this matrix.
-mlpack::data::DatasetInfo info; // Store information about dataset in this.
-
-// Load the ARFF dataset.
-mlpack::data::Load("dataset.arff", dataset, info);
-\endcode
-
-After this load completes, the \c info object will hold the information about
-the mappings necessary to load the dataset.  It is possible to re-use the
-\c DatasetInfo object to load another dataset with the same mappings.  This is
-useful when, for instance, both a training and test set are being loaded, and it
-is necessary that the mappings from strings to integers for categorical features
-are identical.  An example is given below.
-
-\code
-arma::mat trainingData; // Load training data into this matrix.
-mlpack::data::DatasetInfo info; // This will store the mappings.
-
-// Load the training data, and create the mappings in the 'info' object.
-mlpack::data::Load("training_data.arff", trainingData, info);
-
-// Load the test data, but re-use the 'info' object with the already initialized
-// mappings.  This means that the same mappings will be applied to the test set.
-mlpack::data::Load("test_data.arff", trainingData, info);
-\endcode
-
-When saving data, pass the same DatasetInfo object it was loaded with in order
-to unmap the categorical features correctly.  The example below demonstrates
-this functionality: it loads the dataset, increments all non-categorical
-features by 1, and then saves the dataset with the same DatasetInfo it was
-loaded with.
-
-\code
-arma::mat dataset; // Load data into this matrix.
-mlpack::data::DatasetInfo info; // This will store the mappings.
-
-// Load the dataset.
-mlpack::data::Load("dataset.tsv", dataset, info);
-
-// Loop over all features, and add 1 to all non-categorical features.
-for (size_t i = 0; i < info.Dimensionality(); ++i)
-{
-  // The Type() function returns whether or not the data is numeric or
-  // categorical.
-  if (info.Type(i) != mlpack::data::Datatype::categorical)
-    dataset.row(i) += 1.0;
-}
-
-// Save the modified dataset using the same DatasetInfo.
-mlpack::data::Save("dataset-new.tsv", dataset, info);
-\endcode
-
-There is more functionality to the DatasetInfo class; for more information, see
-the mlpack::data::DatasetInfo documentation.
-
-@section intro_imagetut Loading and Saving Images
-
-Image datasets are becoming increasingly popular in deep learning.
-
-mlpack's image saving/loading functionality is based on [stb/](https://github.com/nothings/stb).
-
-@section model_api_imagetut Image Utilities API
-
-Image utilities supports loading and saving of images.
-
-It supports filetypes "jpg", "png", "tga", "bmp", "psd", "gif", "hdr", "pic",
-"pnm" for loading and "jpg", "png", "tga", "bmp", "hdr" for saving.
-
-The datatype associated is unsigned char to support RGB values in the range
-1-255. To feed data into the network typecast of `arma::Mat` may be required.
-Images are stored in the matrix as (width * height * channels, NumberOfImages).
-Therefore @c imageMatrix.col(0) would be the first image if images are loaded in
-@c imageMatrix.
-
-@section imageinfo_api_imagetut Accessing Metadata of Images: ImageInfo
-
-ImageInfo class contains the metadata of the images.
-@code
-ImageInfo(const size_t width,
-          const size_t height,
-          const size_t channels,
-          const size_t quality = 90);
-@endcode
-
-The @c quality member denotes the compression of the image if it is saved as
-`jpg`; it takes values from 0 to 100.
-
-@section load_api_imagetut Loading Images in C++
-
-Standalone loading of images.
-
-@code
-template<typename eT>
-bool Load(const std::string& filename,
-          arma::Mat<eT>& matrix,
-          ImageInfo& info,
-          const bool fatal);
-@endcode
-
-The example below loads a test image. It also fills up the ImageInfo class
-object.
-
-@code
-data::ImageInfo info;
-data::Load("test_image.png", matrix, info, false);
-@endcode
-
-ImageInfo requires height, width, number of channels of the image.
-
-@code
-size_t height = 64, width = 64, channels = 1;
-data::ImageInfo info(width, height, channels);
-@endcode
-
-More than one image can be loaded into the same matrix.
-
-Loading multiple images:
-
-@code
-template<typename eT>
-bool Load(const std::vector<std::string>& files,
-          arma::Mat<eT>& matrix,
-          ImageInfo& info,
-          const bool fatal);
-@endcode
-
-@code
-data::ImageInfo info;
-std::vector<std::string>> files{"test_image1.bmp","test_image2.bmp"};
-data::Load(files, matrix, info, false);
-@endcode
-
-@section save_api_imagetut Saving Images in C++
-
-Save images expects a matrix of type unsigned char in the form (width * height * channels, NumberOfImages).
-Just like load it can be used to save one image or multiple images. Besides image data it also expects the shape of the image as input (width, height, channels).
-
-Saving one image:
-
-@code
-   template<typename eT>
-   bool Save(const std::string& filename,
-             arma::Mat<eT>& matrix,
-             ImageInfo& info,
-             const bool fatal,
-             const bool transpose);
-@endcode
-
-@code
-  data::ImageInfo info;
-  info.width = info.height = 25;
-  info.channels = 3;
-  info.quality = 90;
-  data::Save("test_image.bmp", matrix, info, false, true);
-@endcode
-
-If the matrix contains more than one image, only the first one is saved.
-
-Saving multiple images:
-
-@code
-   template<typename eT>
-   bool Save(const std::vector<std::string>& files,
-             arma::Mat<eT>& matrix,
-             ImageInfo& info,
-             const bool fatal,
-             const bool transpose);
-@endcode
-
-@code
-  data::ImageInfo info;
-  info.width = info.height = 25;
-  info.channels = 3;
-  info.quality = 90;
-  std::vector<std::string>> files{"test_image1.bmp", "test_image2.bmp"};
-  data::Save(files, matrix, info, false, true);
-@endcode
-
-Multiple images are saved according to the vector of filenames specified.
-
-@section formatmodels Loading and saving models
-
-Using \c boost::serialization, mlpack is able to load and save machine learning
-models with ease.  These models can currently be saved in three formats:
-
- - binary (.bin); this is not human-readable, but it is small
- - text (.txt); this is sort of human-readable and relatively small
- - xml (.xml); this is human-readable but very verbose and large
-
-The type of file to save is determined by the given file extension, as with the
-other loading and saving functionality in mlpack.  Below is an example where a
-dataset stored as TSV and labels stored as ASCII text are used to train a
-logistic regression model, which is then saved to model.xml.
-
-\code
-$ mlpack_logistic_regression -t training_dataset.tsv -l training_labels.txt \
-> -M model.xml
-\endcode
-
-Many mlpack command-line programs have support for loading and saving models
-through the \c --input_model_file (\c -m) and \c --output_model_file (\c -M)
-options; for more information, see the documentation for each program
-(accessible by passing \c --help as a parameter).
-
-@section formatmodelscpp Loading and saving models in C++
-
-mlpack uses the \c boost::serialization library internally to perform loading
-and saving of models, and provides convenience overloads of mlpack::data::Load()
-and mlpack::data::Save() to load and save these models.
-
-To be serializable, a class must implement the method
-
-\code
-template<typename Archive>
-void serialize(Archive& ar, const unsigned int version);
-\endcode
-
-\note
-For more information on this method and how it works, see the
-boost::serialization documentation at
-http://www.boost.org/libs/serialization/doc/.
-
-\note
-Examples of serialize() methods can be found in most classes; one fairly
-straightforward example is found \ref mlpack::math::Range::serialize()
-"in the mlpack::math::Range class".  A more complex example is found
-\ref mlpack::tree::BinarySpaceTree::serialize() "in the mlpack::tree::BinarySpaceTree class".
-
-Using the mlpack::data::Load() and mlpack::data::Save() classes is easy if the
-type being saved has a \c serialize() method implemented: simply call either
-function with a filename, a name for the object to save, and the object itself.
-The example below, for instance, creates an mlpack::math::Range object and saves
-it as range.txt.  Then, that range is loaded from file into another
-mlpack::math::Range object.
-
-\code
-// Create range and save it.
-mlpack::math::Range r(0.0, 5.0);
-mlpack::data::Save("range.txt", "range", r);
-
-// Load into new range.
-mlpack::math::Range newRange;
-mlpack::data::Load("range.txt", "range", newRange);
-\endcode
-
-It is important to be sure that you load the appropriate type; if you save, for
-instance, an mlpack::regression::LogisticRegression object and attempt to load
-it as an mlpack::math::Range object, the load will fail and an exception will be
-thrown.  (When the object is saved as binary (.bin), it is possible that the
-load will not fail, but instead load with mangled data, which is perhaps even
-worse!)
-
-@section formatfinal Final notes
-
-If the examples here are unclear, it would be worth looking into the ways that
-mlpack::data::Load() and mlpack::data::Save() are used in the code.  Some
-example files that may be useful to this end:
-
- - src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
- - src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
- - src/mlpack/methods/neighbor_search/knn_main.cpp
-
-If you are interested in adding support for more data types to mlpack, it would
-be preferable to add the support upstream to Armadillo instead, so that may be a
-better direction to go first.  Then very little code modification for mlpack
-will be necessary.
-
-*/
diff -pruN 3.4.2-7/doc/guide/go_quickstart.hpp 4.0.1-1/doc/guide/go_quickstart.hpp
--- 3.4.2-7/doc/guide/go_quickstart.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/go_quickstart.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,230 +0,0 @@
-/**
- * @file go_quickstart.hpp
- * @author Yashwant Singh Parihar
-
-@page go_quickstart mlpack in Go quickstart guide
-
-@section go_quickstart_intro Introduction
-
-This page describes how you can quickly get started using mlpack from Go and
-gives a few examples of usage, and pointers to deeper documentation.
-
-This quickstart guide is also available for @ref python_quickstart "Python",
-@ref cli_quickstart "the command-line", @ref julia_quickstart "Julia" and
-@ref r_quickstart "R".
-
-@section go_quickstart_install Installing mlpack
-
-Installing the mlpack bindings for Go is somewhat time-consuming as the library
-must be built; you can run the following code:
-
-@code{.sh}
-go get -u -d mlpack.org/v1/mlpack
-cd ${GOPATH}/src/mlpack.org/v1/mlpack
-make install
-@endcode
-
-Building the Go bindings from scratch is a little more in-depth, though.  For
-information on that, follow the instructions on the @ref build page, and be sure
-to specify @c -DBUILD_GO_BINDINGS=ON to CMake;
-
-@section go_quickstart_example Simple mlpack quickstart example
-
-As a really simple example of how to use mlpack from Go, let's do some
-simple classification on a subset of the standard machine learning @c covertype
-dataset.  We'll first split the dataset into a training set and a testing set,
-then we'll train an mlpack random forest on the training data, and finally we'll
-print the accuracy of the random forest on the test dataset.
-
-You can copy-paste this code directly into main.go to run it.
-@code{.go}
-package main
-
-import (
-  "mlpack.org/v1/mlpack"
-  "fmt"
-)
-func main() {
-
-  // Download dataset.
-  mlpack.DownloadFile("https://www.mlpack.org/datasets/covertype-small.data.csv.gz",
-                      "data.csv.gz")
-  mlpack.DownloadFile("https://www.mlpack.org/datasets/covertype-small.labels.csv.gz",
-                      "labels.csv.gz")
-
-  // Extract/Unzip the dataset.
-  mlpack.UnZip("data.csv.gz", "data.csv")
-  dataset, _ := mlpack.Load("data.csv")
-
-  mlpack.UnZip("labels.csv.gz", "labels.csv")
-  labels, _ := mlpack.Load("labels.csv")
-
-  // Split the dataset using mlpack.
-  params := mlpack.PreprocessSplitOptions()
-  params.InputLabels = labels
-  params.TestRatio = 0.3
-  params.Verbose = true
-  test, test_labels, train, train_labels :=
-      mlpack.PreprocessSplit(dataset, params)
-
-  // Train a random forest.
-  rf_params := mlpack.RandomForestOptions()
-  rf_params.NumTrees = 10
-  rf_params.MinimumLeafSize = 3
-  rf_params.PrintTrainingAccuracy = true
-  rf_params.Training = train
-  rf_params.Labels = train_labels
-  rf_params.Verbose = true
-  rf_model, _, _ := mlpack.RandomForest(rf_params)
-
-  // Predict the labels of the test points.
-  rf_params_2 := mlpack.RandomForestOptions()
-  rf_params_2.Test = test
-  rf_params_2.InputModel = &rf_model
-  rf_params_2.Verbose = true
-  _, predictions, _ := mlpack.RandomForest(rf_params_2)
-
-  // Now print the accuracy.
-  rows, _ := predictions.Dims()
-  var sum int = 0
-  for i := 0; i < rows; i++ {
-    if (predictions.At(i, 0) == test_labels.At(i, 0)) {
-      sum = sum + 1
-    }
-  }
-  fmt.Print(sum, " correct out of ", rows, " (",
-      (float64(sum) / float64(rows)) * 100, "%).\n")
-}
-@endcode
-
-We can see that we achieve reasonably good accuracy on the test dataset (80%+);
-if we use the full @c covertype.csv.gz, the accuracy should increase
-significantly (but training will take longer).
-
-It's easy to modify the code above to do more complex things, or to use
-different mlpack learners, or to interface with other machine learning toolkits.
-
-@section go_quickstart_whatelse What else does mlpack implement?
-
-The example above has only shown a little bit of the functionality of mlpack.
-Lots of other commands are available with different functionality.  A full list
-of each of these commands and full documentation can be found on the following
-page:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/go_documentation.html">Go documentation</a>
-
-You can also use the GoDoc to explore the @c mlpack module and its
-functions; every function comes with comprehensive documentation.
-
-For more information on what mlpack does, see https://www.mlpack.org/.
-Next, let's go through another example for providing movie recommendations with
-mlpack.
-
-@section go_quickstart_movierecs Using mlpack for movie recommendations
-
-In this example, we'll train a collaborative filtering model using mlpack's
-<tt><a href="https://www.mlpack.org/doc/mlpack-git/go_documentation.html#cf">Cf()</a></tt> method.  We'll train this on the MovieLens dataset from
-https://grouplens.org/datasets/movielens/, and then we'll use the model that we
-train to give recommendations.
-
-You can copy-paste this code directly into main.go to run it.
-
-@code{.go}
-package main
-
-import (
-  "github.com/frictionlessdata/tableschema-go/csv"
-  "mlpack.org/v1/mlpack"
-  "gonum.org/v1/gonum/mat"
-  "fmt"
-)
-func main() {
-
-  // Download dataset.
-  mlpack.DownloadFile("https://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz",
-                      "ratings-only.csv.gz")
-  mlpack.DownloadFile("https://www.mlpack.org/datasets/ml-20m/movies.csv.gz",
-                      "movies.csv.gz")
-
-  // Extract dataset.
-  mlpack.UnZip("ratings-only.csv.gz", "ratings-only.csv")
-  ratings, _ := mlpack.Load("ratings-only.csv")
-
-  mlpack.UnZip("movies.csv.gz", "movies.csv")
-  table, _ := csv.NewTable(csv.FromFile("movies.csv"), csv.LoadHeaders())
-  movies, _ := table.ReadColumn("title")
-
-  // Split the dataset using mlpack.
-  params := mlpack.PreprocessSplitOptions()
-  params.TestRatio = 0.1
-  params.Verbose = true
-  ratings_test, _, ratings_train, _ := mlpack.PreprocessSplit(ratings, params)
-
-  // Train the model.  Change the rank to increase/decrease the complexity of the
-  // model.
-  cf_params := mlpack.CfOptions()
-  cf_params.Training = ratings_train
-  cf_params.Test = ratings_test
-  cf_params.Rank = 10
-  cf_params.Verbose = true
-  cf_params.Algorithm = "RegSVD"
-  _, cf_model := mlpack.Cf(cf_params)
-
-  // Now query the 5 top movies for user 1.
-  cf_params_2 := mlpack.CfOptions()
-  cf_params_2.InputModel = &cf_model
-  cf_params_2.Recommendations = 10
-  cf_params_2.Query = mat.NewDense(1, 1, []float64{1})
-  cf_params_2.Verbose = true
-  cf_params_2.MaxIterations = 10
-  output, _ := mlpack.Cf(cf_params_2)
-
-  // Get the names of the movies for user 1.
-  fmt.Println("Recommendations for user 1")
-  for i := 0; i < 10; i++ {
-    fmt.Println(i, ":", movies[int(output.At(0 , i))])
-  }
-}
-@endcode
-
-Here is some example output, showing that user 1 seems to have good taste in
-movies:
-
-@code{.unparsed}
-Recommendations for user 1:
-  0: Casablanca (1942)
-  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
-  2: Godfather, The (1972)
-  3: Answer This! (2010)
-  4: Life Is Beautiful (La Vita è bella) (1997)
-  5: Adventures of Tintin, The (2011)
-  6: Dark Knight, The (2008)
-  7: Out for Justice (1991)
-  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
-  9: Schindler's List (1993)
-@endcode
-
-@section go_quickstart_nextsteps Next steps with mlpack
-
-Now that you have done some simple work with mlpack, you have seen how it can
-easily plug into a data science workflow in Go.  A great thing to do next
-would be to look at more documentation for the Go mlpack bindings:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/go_documentation.html">Go mlpack
-   binding documentation</a>
-
-Also, mlpack is much more flexible from C++ and allows much greater
-functionality.  So, more complicated tasks are possible if you are willing to
-write C++.  To get started learning about mlpack in C++, the following resources
-might be helpful:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/tutorials.html">mlpack
-   C++ tutorials</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/build.html">mlpack
-   build and installation guide</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/sample.html">Simple
-   sample C++ mlpack programs</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/index.html">mlpack
-   Doxygen documentation homepage</a>
-
- */
diff -pruN 3.4.2-7/doc/guide/hpt.hpp 4.0.1-1/doc/guide/hpt.hpp
--- 3.4.2-7/doc/guide/hpt.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/hpt.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,238 +0,0 @@
-namespace mlpack {
-namespace hpt {
-
-/*! @page hpt_guide Hyper-Parameter Tuning
-
-@section hptintro Introduction
-
-\b mlpack implements a generic hyperparameter tuner that is able to tune both
-continuous and discrete parameters of various different algorithms.  This is an
-important task---the performance of many machine learning algorithms can be
-highly dependent on the hyperparameters that are chosen for that algorithm.
-(One example: the choice of \f$k\f$ for a \f$k\f$-nearest-neighbors classifier.)
-
-This hyper-parameter tuner is built on the same general concept as the
-cross-validation classes (see the @ref cv "cross-validation tutorial"): given
-some machine learning algorithm, some data, some performance measure, and a set
-of hyperparameters, attempt to find the hyperparameter set that best optimizes
-the performance measure on the given data with the given algorithm.
-
-\b mlpack's implementation of hyperparameter tuning is flexible, and is built in
-a way that supports many algorithms and many optimizers.  At the time of this
-writing, complex hyperparameter optimization techniques are not available, but
-the hyperparameter tuner does support these, should they be implemented in the
-future.
-
-In this tutorial we will see the usage examples of the hyper-parameter tuning
-module, and also more details about the \c HyperParameterTuner class.
-
-@section hptbasic Basic Usage
-
-The interface of the hyper-parameter tuning module is quite similar to the
-interface of the @ref cv "cross-validation module". To construct a \c
-HyperParameterTuner object you need to specify as template parameters what
-machine learning algorithm, cross-validation strategy, performance measure, and
-optimization strategy (\c ens::GridSearch will be used by
-default) you are going to use.  Then, you must pass the same arguments as for
-the cross-validation classes: the data and labels (or responses) to use are
-given to the constructor, and the possible hyperparameter values are given to
-the \c HyperParameterTuner::Optimize() method, which returns the best
-algorithm configuration as a \c std::tuple<>.
-
-Let's see some examples.
-
-Suppose we have the following data to train and validate on.
-@code
-  // 100-point 5-dimensional random dataset.
-  arma::mat data = arma::randu<arma::mat>(5, 100);
-  // Noisy responses retrieved by a random linear transformation of data.
-  arma::rowvec responses = arma::randu<arma::rowvec>(5) * data +
-      0.1 * arma::randn<arma::rowvec>(100);
-@endcode
-
-Given the dataset above, we can use the following code to try to find a good \c
-lambda value for \ref regression::LinearRegression "LinearRegression".  Here we
-use \ref cv::SimpleCV "SimpleCV" instead of k-fold cross-validation to save
-computation time.
-
-@code
-  // Using 80% of data for training and remaining 20% for assessing MSE.
-  double validationSize = 0.2;
-  HyperParameterTuner<LinearRegression, MSE, SimpleCV> hpt(validationSize,
-      data, responses);
-
-  // Finding a good value for lambda from the discrete set of values 0.0, 0.001,
-  // 0.01, 0.1, and 1.0.
-  arma::vec lambdas{0.0, 0.001, 0.01, 0.1, 1.0};
-  double bestLambda;
-  std::tie(bestLambda) = hpt.Optimize(lambdas);
-@endcode
-
-In this example we have used \c ens::GridSearch (the
-default optimizer) to find a good value for the \c lambda hyper-parameter.  For
-that we have specified what values should be tried.
-
-@section hptfixed Fixed Arguments
-
-When some hyper-parameters should not be optimized, you can specify values
-for them with the \c Fixed() method as in the following example of trying to
-find good \c lambda1 and \c lambda2 values for \ref regression::LARS "LARS"
-(least-angle regression).
-
-@code
-  HyperParameterTuner<LARS, MSE, SimpleCV> hpt2(validationSize, data,
-      responses);
-
-  // The hyper-parameter tuner should not try to change the transposeData or
-  // useCholesky parameters.
-  bool transposeData = true;
-  bool useCholesky = false;
-
-  // We wish only to search for the best lambda1 and lambda2 values.
-  arma::vec lambda1Set{0.0, 0.001, 0.01, 0.1, 1.0};
-  arma::vec lambda2Set{0.0, 0.002, 0.02, 0.2, 2.0};
-
-  double bestLambda1, bestLambda2;
-  std::tie(bestLambda1, bestLambda2) = hpt2.Optimize(Fixed(transposeData),
-      Fixed(useCholesky), lambda1Set, lambda2Set);
-@endcode
-
-Note that for the call to \c hpt2.Optimize(), we have used the same order of
-arguments as they appear in the corresponding \ref regression::LARS "LARS"
-constructor:
-
-@code
-  LARS(const arma::mat& data,
-       const arma::rowvec& responses,
-       const bool transposeData = true,
-       const bool useCholesky = false,
-       const double lambda1 = 0.0,
-       const double lambda2 = 0.0,
-       const double tolerance = 1e-16);
-@endcode
-
-@section hptgradient Gradient-Based Optimization
-
-In some cases we may wish to optimize a hyperparameter over the space of all
-possible real values, instead of providing a grid in which to search.
-Alternately, we may know approximately optimal values from a grid search for
-real-valued hyperparameters, but wish to further tune those values.
-
-In this case, we can use a gradient-based optimizer for hyperparameter search.
-In the following example, we try to optimize the \c lambda1 and \c lambda2
-hyper-parameters for \ref regression::LARS "LARS" with the
-\c ens::GradientDescent optimizer.
-
-@code
-  HyperParameterTuner<LARS, MSE, SimpleCV, GradientDescent> hpt3(validationSize,
-      data, responses);
-
-  // GradientDescent can be adjusted in the following way.
-  hpt3.Optimizer().StepSize() = 0.1;
-  hpt3.Optimizer().Tolerance() = 1e-15;
-
-  // We can set up values used for calculating gradients.
-  hpt3.RelativeDelta() = 0.01;
-  hpt3.MinDelta() = 1e-10;
-
-  double initialLambda1 = 0.001;
-  double initialLambda2 = 0.002;
-
-  double bestGDLambda1, bestGDLambda2;
-  std::tie(bestGDLambda1, bestGDLambda2) = hpt3.Optimize(Fixed(transposeData),
-      Fixed(useCholesky), initialLambda1, initialLambda2);
-@endcode
-
-@section hpt_class The HyperParameterTuner class
-
-The \c HyperParameterTuner class is very similar to the
-\ref cv::KFoldCV "KFoldCV" and \ref cv::SimpleCV "SimpleCV" classes (see the
-\ref cv "cross-validation tutorial" for more information on those two classes), but
-there are a few important differences.
-
-First, the \c HyperParameterTuner accepts five different hyperparameters; only
-the first three of these are required:
-
-  - \c MLAlgorithm This is the algorithm to be used.
-  - \c Metric This is the performance measure to be used; see
-        @ref cvbasic_metrics for more information.
-  - \c CVType This is the type of cross-validation to be used for evaluating the
-        performance measure; this should be \ref cv::KFoldCV "KFoldCV" or
-        \ref cv::SimpleCV "SimpleCV".
-  - \c OptimizerType This is the type of optimizer to use; it can be
-        \c GridSearch or a gradient-based optimizer.
-  - \c MatType This is the type of data matrix to use.  The default is
-        \c arma::mat.  This only needs to be changed if you are specifically
-        using sparse data, or if you want to use a numeric type other than
-        \c double.
-
-The last two template parameters are automatically inferred by the
-\c HyperParameterTuner and should not need to be manually specified, unless an
-unconventional data type like \c arma::fmat is being used for data points.
-
-Typically, \ref cv::SimpleCV "SimpleCV" is a good choice for \c CVType because
-it takes so much less time to compute than full \ref cv::KFoldCV "KFoldCV";
-however, the disadvantage is that \ref cv::SimpleCV "SimpleCV" might give a
-somewhat more noisy estimate of the performance measure on unseen test data.
-
-The constructor for the \c HyperParameterTuner is called with exactly the same
-arguments as the corresponding \c CVType that has been chosen.  For more
-information on that, please see the
-@ref cvbasic_api "cross-validation constructor tutorial".  As an example, if we
-are using \ref cv::SimpleCV "SimpleCV" and wish to hold out 20\% of the dataset
-as a validation set, we might construct a \c HyperParameterTuner like this:
-
-@code
-// We will use LinearRegression as the MLAlgorithm, and MSE as the performance
-// measure.  Our dataset is 'dataset' and the responses are 'responses'.
-HyperParameterTuner<LinearRegression, MSE, SimpleCV> hpt(0.2, dataset,
-    responses);
-@endcode
-
-Next, we must set up the hyperparameters to be optimized.  If we are doing a
-grid search with the \c ens::GridSearch optimizer (the
-default), then we only need to pass a `std::vector` (for non-numeric
-hyperparameters) or an `arma::vec` (for numeric hyperparameters) containing all
-of the possible choices that we wish to search over.
-
-For instance, a set of numeric values might be chosen like this, for the
-\c lambda parameter (of type \c double):
-
-@code
-arma::vec lambdaSet = arma::vec("0.0 0.1 0.5 1.0");
-@endcode
-
-Similarly, a set of non-numeric values might be chosen like this, for the
-\c intercept parameter:
-
-@code
-std::vector<bool> interceptSet = { false, true };
-@endcode
-
-Once all of these are set up, the \c HyperParameterTuner::Optimize() method may
-be called to find the best set of hyperparameters:
-
-@code
-bool intercept;
-double lambda;
-std::tie(lambda, intercept) = hpt.Optimize(lambdaSet, interceptSet);
-@endcode
-
-Alternately, the \c Fixed() method (detailed in the @ref hptfixed
-"Fixed arguments" section) can be used to fix the values of some parameters.
-
-For continuous optimizers like
-\c ens::GradientDescent, a range does not need to
-be specified but instead only a single value.  See the
-\ref hptgradient "Gradient-Based Optimization" section for more details.
-
-@section hptfurther Further documentation
-
-For more information on the \c HyperParameterTuner class, see the
-mlpack::hpt::HyperParameterTuner class documentation and the
-@ref cv "cross-validation tutorial".
-
-*/
-
-} // namespace hpt
-} // namespace mlpack
diff -pruN 3.4.2-7/doc/guide/iodoc.hpp 4.0.1-1/doc/guide/iodoc.hpp
--- 3.4.2-7/doc/guide/iodoc.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/iodoc.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,215 +0,0 @@
-/*! @page iodoc Writing an mlpack binding
-
-@section iointro Introduction
-
-This tutorial gives some simple examples of how to write an mlpack binding that
-can be compiled for multiple languages.  These bindings make up the core of how
-most users will interact with mlpack.
-
-mlpack provides the following:
-
- - mlpack::Log, for debugging / informational / warning / fatal output
- - mlpack::IO, for parsing command line options or other option
-
-Each of those classes are well-documented, and that documentation should be
-consulted for further reference.
-
-First, we'll discuss the logging infrastructure, which is useful for giving
-output that users can see.
-
-@section simplelog Simple Logging Example
-
-mlpack has four logging levels:
-
- - Log::Debug
- - Log::Info
- - Log::Warn
- - Log::Fatal
-
-Output to Log::Debug does not show (and has no performance penalty) when mlpack
-is compiled without debugging symbols.  Output to Log::Info is only shown when
-the program is run with the \c --verbose (or \c -v) flag.  Log::Warn is always
-shown, and Log::Fatal will throw a std::runtime_error exception, after a newline
-is sent to it. If mlpack was compiled with debugging symbols, Log::Fatal will
-also print a backtrace, if the necessary libraries are available.
-
-Here is a simple example binding, and its output.  Note that instead of
-\c int \c main(), we use \c static \c void \c mlpackMain().  This is because the
-automatic binding generator (see \ref bindings) will set up the environment and
-once that is done, it will call \c mlpackMain().
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/core/util/io.hpp>
-// This definition below means we will only compile for the command line.
-#define BINDING_TYPE BINDING_TYPE_CLI
-#include <mlpack/core/util/mlpack_main.hpp>
-
-using namespace mlpack;
-
-static void mlpackMain()
-{
-  Log::Debug << "Compiled with debugging symbols." << std::endl;
-
-  Log::Info << "Some test informational output." << std::endl;
-
-  Log::Warn << "A warning!" << std::endl;
-
-  Log::Fatal << "Program has crashed." << std::endl;
-
-  Log::Warn << "Made it!" << std::endl;
-}
-@endcode
-
-Assuming mlpack is installed on the system and the code above is saved in
-\c test.cpp, this program can be compiled with the following command:
-
-@code
-$ g++ -o test test.cpp -DDEBUG -g -rdynamic -lmlpack
-@endcode
-
-Since we compiled with \c -DDEBUG, if we run the program as below, the following
-output is shown:
-
-@code
-$ ./test --verbose
-[DEBUG] Compiled with debugging symbols.
-[INFO ] Some test informational output.
-[WARN ] A warning!
-[FATAL] [bt]: (1) /absolute/path/to/file/example.cpp:6: function()
-[FATAL] Program has crashed.
-terminate called after throwing an instance of 'std::runtime_error'
-  what():  fatal error; see Log::Fatal output
-Aborted
-@endcode
-
-The flags \c -g and \c -rdynamic are only necessary for providing a backtrace.
-If those flags are not given during compilation, the following output would be
-shown:
-
-@code
-$ ./test --verbose
-[DEBUG] Compiled with debugging symbols.
-[INFO ] Some test informational output.
-[WARN ] A warning!
-[FATAL] Cannot give backtrace because program was compiled without: -g -rdynamic
-[FATAL] For a backtrace, recompile with: -g -rdynamic.
-[FATAL] Program has crashed.
-terminate called after throwing an instance of 'std::runtime_error'
-  what():  fatal error; see Log::Fatal output
-Aborted
-@endcode
-
-The last warning is not reached, because Log::Fatal terminates the program.
-
-Without debugging symbols (i.e. without \c -g and \c -DDEBUG) and without
---verbose, the following is shown:
-
-@code
-$ ./test
-[WARN ] A warning!
-[FATAL] Program has crashed.
-terminate called after throwing an instance of 'std::runtime_error'
-  what():  fatal error; see Log::Fatal output
-Aborted
-@endcode
-
-These four outputs can be very useful for both providing informational output
-and debugging output for your mlpack program.
-
-@section simpleio Simple IO Example
-
-Through the mlpack::IO object, command-line parameters can be easily added
-with the BINDING_NAME, BINDING_SHORT_DESC, BINDING_LONG_DESC, BINDING_EXAMPLE,
-BINDING_SEE_ALSO, PARAM_INT, PARAM_DOUBLE, PARAM_STRING, and PARAM_FLAG
-macros.
-
-Here is a sample use of those macros, extracted from methods/pca/pca_main.cpp.
-(Some details have been omitted from the snippet below.)
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/core/util/io.hpp>
-#include <mlpack/core/util/mlpack_main.hpp>
-
-// Program Name.
-BINDING_NAME("Principal Components Analysis");
-
-// Short description.
-BINDING_SHORT_DESC(
-    "An implementation of several strategies for principal components analysis "
-    "(PCA), a common preprocessing step.  Given a dataset and a desired new "
-    "dimensionality, this can reduce the dimensionality of the data using the "
-    "linear transformation determined by PCA.");
-
-// Long description.
-BINDING_LONG_DESC(
-    "This program performs principal components analysis on the given dataset "
-    "using the exact, randomized, randomized block Krylov, or QUIC SVD method. "
-    "It will transform the data onto its principal components, optionally "
-    "performing dimensionality reduction by ignoring the principal components "
-    "with the smallest eigenvalues.");
-
-// See also...
-BINDING_SEE_ALSO("Principal component analysis on Wikipedia",
-        "https://en.wikipedia.org/wiki/Principal_component_analysis");
-BINDING_SEE_ALSO("mlpack::pca::PCA C++ class documentation",
-        "@doxygen/classmlpack_1_1pca_1_1PCA.html"));
-
-// Parameters for program.
-PARAM_MATRIX_IN_REQ("input", "Input dataset to perform PCA on.", "i");
-PARAM_MATRIX_OUT("output", "Matrix to save modified dataset to.", "o");
-PARAM_INT_IN("new_dimensionality", "Desired dimensionality of output dataset.",
-    "d", 0);
-
-using namespace mlpack;
-
-static void mlpackMain()
-{
-  // Load input dataset.
-  arma::mat& dataset = IO::GetParam<arma::mat>("input");
-
-  size_t newDimension = IO::GetParam<int>("new_dimensionality");
-
-  ...
-
-  // Now save the results.
-  if (IO::HasParam("output"))
-    IO::GetParam<arma::mat>("output") = std::move(dataset);
-}
-@endcode
-
-Documentation is automatically generated using those macros, and when the
-program is run with --help the following is displayed:
-
-@code
-$ mlpack_pca --help
-Principal Components Analysis
-
-  This program performs principal components analysis on the given dataset.  It
-  will transform the data onto its principal components, optionally performing
-  dimensionality reduction by ignoring the principal components with the
-  smallest eigenvalues.
-
-Required options:
-
-  --input_file [string]         Input dataset to perform PCA on.
-  --output_file [string]        Matrix to save modified dataset to.
-
-Options:
-
-  --help (-h)                   Default help info.
-  --info [string]               Get help on a specific module or option.
-                                Default value ''.
-  --new_dimensionality [int]    Desired dimensionality of output dataset.
-                                Default value 0.
-  --verbose (-v)                Display informational messages and the full list
-                                of parameters and timers at the end of
-                                execution.
-@endcode
-
-The mlpack::IO documentation can be consulted for further and complete
-documentation.  Also useful is to look at other example bindings, found in
-\c src/mlpack/methods/.
-
-*/
diff -pruN 3.4.2-7/doc/guide/julia_quickstart.hpp 4.0.1-1/doc/guide/julia_quickstart.hpp
--- 3.4.2-7/doc/guide/julia_quickstart.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/julia_quickstart.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,192 +0,0 @@
-/**
- * @file julia_quickstart.hpp
- * @author Ryan Curtin
-
-@page julia_quickstart mlpack in Julia quickstart guide
-
-@section julia_quickstart_intro Introduction
-
-This page describes how you can quickly get started using mlpack from Julia and
-gives a few examples of usage, and pointers to deeper documentation.
-
-This quickstart guide is also available for @ref python_quickstart "Python",
-@ref cli_quickstart "the command-line", @ref go_quickstart "Go" and
-@ref r_quickstart "R".
-
-@section julia_quickstart_install Installing mlpack
-
-Installing the mlpack bindings for Julia is straightforward; you can just use
-@c Pkg:
-
-@code{.julia}
-using Pkg
-Pkg.add("mlpack")
-@endcode
-
-Building the Julia bindings from scratch is a little more in-depth, though.  For
-information on that, follow the instructions on the @ref build page, and be sure
-to specify @c -DBUILD_JULIA_BINDINGS=ON to CMake; you may need to also set the
-location of the Julia program with @c -DJULIA_EXECUTABLE=/path/to/julia.
-
-@section julia_quickstart_example Simple mlpack quickstart example
-
-As a really simple example of how to use mlpack from Julia, let's do some
-simple classification on a subset of the standard machine learning @c covertype
-dataset.  We'll first split the dataset into a training set and a testing set,
-then we'll train an mlpack random forest on the training data, and finally we'll
-print the accuracy of the random forest on the test dataset.
-
-You can copy-paste this code directly into Julia to run it.  You may need to add
-some extra packages with, e.g., `using Pkg; Pkg.add("CSV");
-Pkg.add("DataFrames"); Pkg.add("Libz")`.
-
-@code{.julia}
-using CSV
-using DataFrames
-using Libz
-using mlpack
-
-# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
-# want to use on the full dataset.
-df = CSV.read(ZlibInflateInputStream(open(download(
-        "http://www.mlpack.org/datasets/covertype-small.csv.gz"))))
-
-# Split the labels.
-labels = df[!, :label][:]
-dataset = select!(df, Not(:label))
-
-# Split the dataset using mlpack.
-test, test_labels, train, train_labels = mlpack.preprocess_split(
-    dataset,
-    input_labels=labels,
-    test_ratio=0.3)
-
-# Train a random forest.
-rf_model, _, _ = mlpack.random_forest(training=train,
-                              labels=train_labels,
-                              print_training_accuracy=true,
-                              num_trees=10,
-                              minimum_leaf_size=3)
-
-# Predict the labels of the test points.
-_, predictions, _ = mlpack.random_forest(input_model=rf_model,
-                                         test=test)
-
-# Now print the accuracy.  The third return value ('probabilities'), which we
-# ignored here, could also be used to generate an ROC curve.
-correct = sum(predictions .== test_labels)
-print("$(correct) out of $(length(test_labels)) test points correct " *
-    "($(correct / length(test_labels) * 100.0)%).\n")
-@endcode
-
-We can see that we achieve reasonably good accuracy on the test dataset (80%+);
-if we use the full @c covertype.csv.gz, the accuracy should increase
-significantly (but training will take longer).
-
-It's easy to modify the code above to do more complex things, or to use
-different mlpack learners, or to interface with other machine learning toolkits.
-
-@section julia_quickstart_whatelse What else does mlpack implement?
-
-The example above has only shown a little bit of the functionality of mlpack.
-Lots of other commands are available with different functionality.  A full list
-of each of these commands and full documentation can be found on the following
-page:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/julia_documentation.html">Julia documentation</a>
-
-You can also use the Julia REPL to explore the @c mlpack module and its
-functions; every function comes with comprehensive documentation.
-
-For more information on what mlpack does, see https://www.mlpack.org/.
-Next, let's go through another example for providing movie recommendations with
-mlpack.
-
-@section julia_quickstart_movierecs Using mlpack for movie recommendations
-
-In this example, we'll train a collaborative filtering model using mlpack's
-<tt><a href="https://www.mlpack.org/doc/mlpack-git/julia_documentation.html#cf">cf()</a></tt> method.  We'll train this on the MovieLens dataset from
-https://grouplens.org/datasets/movielens/, and then we'll use the model that we
-train to give recommendations.
-
-You can copy-paste this code directly into Julia to run it.
-
-@code{.julia}
-using CSV
-using mlpack
-using Libz
-using DataFrames
-
-# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
-# want to use on the full dataset.
-ratings = CSV.read(ZlibInflateInputStream(open(download(
-        "http://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz"))))
-movies = CSV.read(ZlibInflateInputStream(open(download(
-        "http://www.mlpack.org/datasets/ml-20m/movies.csv.gz"))))
-
-# Hold out 10% of the dataset into a test set so we can evaluate performance.
-ratings_test, _, ratings_train, _ = mlpack.preprocess_split(ratings;
-    test_ratio=0.1, verbose=true)
-
-# Train the model.  Change the rank to increase/decrease the complexity of the
-# model.
-_, cf_model = mlpack.cf(training=ratings_train,
-                        test=ratings_test,
-                        rank=10,
-                        verbose=true,
-                        algorithm="RegSVD")
-
-# Now query the 5 top movies for user 1.
-output, _ = mlpack.cf(input_model=cf_model,
-                      query=[1],
-                      recommendations=10,
-                      verbose=true,
-                      max_iterations=10)
-
-print("Recommendations for user 1:\n")
-for i in 1:10
-  print("  $(i): $(movies[output[i], :][3])\n")
-end
-@endcode
-
-Here is some example output, showing that user 1 seems to have good taste in
-movies:
-
-@code{.unparsed}
-Recommendations for user 1:
-  0: Casablanca (1942)
-  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
-  2: Godfather, The (1972)
-  3: Answer This! (2010)
-  4: Life Is Beautiful (La Vita è bella) (1997)
-  5: Adventures of Tintin, The (2011)
-  6: Dark Knight, The (2008)
-  7: Out for Justice (1991)
-  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
-  9: Schindler's List (1993)
-@endcode
-
-@section julia_quickstart_nextsteps Next steps with mlpack
-
-Now that you have done some simple work with mlpack, you have seen how it can
-easily plug into a data science workflow in Julia.  A great thing to do next
-would be to look at more documentation for the Julia mlpack bindings:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/julia_documentation.html">Julia mlpack
-   binding documentation</a>
-
-Also, mlpack is much more flexible from C++ and allows much greater
-functionality.  So, more complicated tasks are possible if you are willing to
-write C++ (or perhaps CxxWrap.jl).  To get started learning about mlpack in C++,
-the following resources might be helpful:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/tutorials.html">mlpack
-   C++ tutorials</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/build.html">mlpack
-   build and installation guide</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/sample.html">Simple
-   sample C++ mlpack programs</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/index.html">mlpack
-   Doxygen documentation homepage</a>
-
- */
diff -pruN 3.4.2-7/doc/guide/matrices.hpp 4.0.1-1/doc/guide/matrices.hpp
--- 3.4.2-7/doc/guide/matrices.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/matrices.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,75 +0,0 @@
-/*! @page matrices Matrices in mlpack
-
-@section matintro Introduction
-
-mlpack uses Armadillo matrices for matrix support.  Armadillo is a fast C++
-matrix library which makes use of advanced template techniques to provide the
-fastest possible matrix operations.
-
-Documentation on Armadillo can be found on their website:
-
-http://arma.sourceforge.net/docs.html
-
-Nonetheless, there are a few further caveats for mlpack Armadillo usage.
-
-@section format Column-major Matrices
-
-Armadillo matrices are stored in a column-major format; this means that on disk,
-each column is located in contiguous memory.
-
-This means that, for the vast majority of machine learning methods, it is faster
-to store observations as columns and dimensions as rows.  This is counter to
-most standard machine learning texts!
-
-Major implications of this are for linear algebra.  For instance, the covariance
-of a matrix is typically
-
-@f[
-C = X^T X
-@f]
-
-but for a column-wise matrix, it is
-
-@f[
-C = X X^T
-@f]
-
-and this is very important to keep in mind!  If your mlpack code is not working,
-this may be a factor in why.
-
-@section loading Loading Matrices
-
-mlpack provides a data::Load() and data::Save() function, which should be used
-instead of Armadillo's loading and saving functions.
-
-Most machine learning data is stored in row-major format; a CSV, for example,
-will generally have one observation per line and each column will correspond to
-a dimension.
-
-The data::Load() and data::Save() functions transpose the matrix upon loading,
-meaning that the following CSV:
-
-@code
-$ cat data.csv
-3,3,3,3,0
-3,4,4,3,0
-3,4,4,3,0
-3,3,4,3,0
-3,6,4,3,0
-2,4,4,3,0
-2,4,4,1,0
-3,3,3,2,0
-3,4,4,2,0
-3,4,4,2,0
-3,3,4,2,0
-3,6,4,2,0
-2,4,4,2,0
-@endcode
-
-is actually loaded with 5 rows and 13 columns, not 13 rows and 5 columns like
-the CSV is written.  More information on mlpack's loading functionality can be
-found in \ref formatdoc.
-
-This is important to remember!
-
-*/
diff -pruN 3.4.2-7/doc/guide/python_quickstart.hpp 4.0.1-1/doc/guide/python_quickstart.hpp
--- 3.4.2-7/doc/guide/python_quickstart.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/python_quickstart.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,215 +0,0 @@
-/**
- * @file python_quickstart.hpp
- * @author Ryan Curtin
-
-@page python_quickstart mlpack in Python quickstart guide
-
-@section python_quickstart_intro Introduction
-
-This page describes how you can quickly get started using mlpack from Python and
-gives a few examples of usage, and pointers to deeper documentation.
-
-This quickstart guide is also available for 
-@ref cli_quickstart "the command-line" and @ref julia_quickstart "Julia".
-
-@section python_quickstart_install Installing mlpack
-
-Installing the mlpack bindings for Python is straightforward.  It's easy to use
-conda or pip to do this:
-
-@code{.sh}
-pip install mlpack
-@endcode
-
-@code{.sh}
-conda install -c conda-forge mlpack
-@endcode
-
-Otherwise, we can build the Python bindings from scratch, as follows.  First we
-have to install the dependencies (the code below is for Ubuntu), then we can
-build and install mlpack.  You can copy-paste the commands into your shell.
-
-@code{.sh}
-sudo apt-get install libboost-all-dev g++ cmake libarmadillo-dev python-pip wget
-sudo pip install cython setuptools distutils numpy pandas
-wget https://www.mlpack.org/files/mlpack-3.4.2.tar.gz
-tar -xvzpf mlpack-3.4.2.tar.gz
-mkdir -p mlpack-3.4.2/build/ && cd mlpack-3.4.2/build/
-cmake ../ && make -j4 && sudo make install
-@endcode
-
-More information on the build process and details can be found on the @ref build
-page.  You may also need to set the environment variable @c LD_LIBRARY_PATH to
-include @c /usr/local/lib/ on most Linux systems.
-
-@code
-export LD_LIBRARY_PATH=/usr/local/lib/
-@endcode
-
-You can also use the mlpack Docker image on Dockerhub, which has all of the
-Python bindings pre-installed:
-
-@code
-docker run -it mlpack/mlpack /bin/bash
-@endcode
-
-@section python_quickstart_example Simple mlpack quickstart example
-
-As a really simple example of how to use mlpack from Python, let's do some
-simple classification on a subset of the standard machine learning @c covertype
-dataset.  We'll first split the dataset into a training set and a testing set,
-then we'll train an mlpack random forest on the training data, and finally we'll
-print the accuracy of the random forest on the test dataset.
-
-You can copy-paste this code directly into Python to run it.
-
-@code{.py}
-import mlpack
-import pandas as pd
-import numpy as np
-
-# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
-# want to use on the full dataset.
-df = pd.read_csv('http://www.mlpack.org/datasets/covertype-small.csv.gz')
-
-# Split the labels.
-labels = df['label']
-dataset = df.drop('label', 1)
-
-# Split the dataset using mlpack.  The output comes back as a dictionary,
-# which we'll unpack for clarity of code.
-output = mlpack.preprocess_split(input=dataset,
-                                 input_labels=labels,
-                                 test_ratio=0.3)
-training_set = output['training']
-training_labels = output['training_labels']
-test_set = output['test']
-test_labels = output['test_labels']
-
-# Train a random forest.
-output = mlpack.random_forest(training=training_set,
-                              labels=training_labels,
-                              print_training_accuracy=True,
-                              num_trees=10,
-                              minimum_leaf_size=3)
-random_forest = output['output_model']
-
-# Predict the labels of the test points.
-output = mlpack.random_forest(input_model=random_forest,
-                              test=test_set)
-
-# Now print the accuracy.  The 'probabilities' output could also be used
-# to generate an ROC curve.
-correct = np.sum(
-    output['predictions'] == np.reshape(test_labels, (test_labels.shape[0],)))
-print(str(correct) + ' correct out of ' + str(len(test_labels)) + ' (' +
-    str(100 * float(correct) / float(len(test_labels))) + '%).')
-@endcode
-
-We can see that we achieve reasonably good accuracy on the test dataset (80%+);
-if we use the full @c covertype.csv.gz, the accuracy should increase
-significantly (but training will take longer).
-
-It's easy to modify the code above to do more complex things, or to use
-different mlpack learners, or to interface with other machine learning toolkits.
-
-@section python_quickstart_whatelse What else does mlpack implement?
-
-The example above has only shown a little bit of the functionality of mlpack.
-Lots of other commands are available with different functionality.  A full list
-of each of these commands and full documentation can be found on the following
-page:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/python_documentation.html">Python documentation</a>
-
-For more information on what mlpack does, see https://www.mlpack.org/.
-Next, let's go through another example for providing movie recommendations with
-mlpack.
-
-@section python_quickstart_movierecs Using mlpack for movie recommendations
-
-In this example, we'll train a collaborative filtering model using mlpack's
-<tt><a href="https://www.mlpack.org/doc/mlpack-git/python_documentation.html#cf">cf()</a></tt> method.  We'll train this on the MovieLens dataset from
-https://grouplens.org/datasets/movielens/, and then we'll use the model that we
-train to give recommendations.
-
-You can copy-paste this code directly into Python to run it.
-
-@code{.py}
-import mlpack
-import pandas as pd
-import numpy as np
-
-# First, load the MovieLens dataset.  This is taken from files.grouplens.org/
-# but reposted on mlpack.org as unpacked and slightly preprocessed data.
-ratings = pd.read_csv('http://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz')
-movies = pd.read_csv('http://www.mlpack.org/datasets/ml-20m/movies.csv.gz')
-
-# Hold out 10% of the dataset into a test set so we can evaluate performance.
-output = mlpack.preprocess_split(input=ratings, test_ratio=0.1, verbose=True)
-ratings_train = output['training']
-ratings_test = output['test']
-
-# Train the model.  Change the rank to increase/decrease the complexity of the
-# model.
-output = mlpack.cf(training=ratings_train,
-                   test=ratings_test,
-                   rank=10,
-                   verbose=True,
-                   algorithm='RegSVD')
-cf_model = output['output_model']
-
-# Now query the 5 top movies for user 1.
-output = mlpack.cf(input_model=cf_model,
-                   query=[[1]],
-                   recommendations=10,
-                   verbose=True)
-
-# Get the names of the movies for user 1.
-print("Recommendations for user 1:")
-for i in range(10):
-  print("  " + str(i) + ": " + str(movies.loc[movies['movieId'] ==
-      output['output'][0, i]].iloc[0]['title']))
-@endcode
-
-Here is some example output, showing that user 1 seems to have good taste in
-movies:
-
-@code{.unparsed}
-Recommendations for user 1:
-  0: Casablanca (1942)
-  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
-  2: Godfather, The (1972)
-  3: Answer This! (2010)
-  4: Life Is Beautiful (La Vita è bella) (1997)
-  5: Adventures of Tintin, The (2011)
-  6: Dark Knight, The (2008)
-  7: Out for Justice (1991)
-  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
-  9: Schindler's List (1993)
-@endcode
-
-@section python_quickstart_nextsteps Next steps with mlpack
-
-Now that you have done some simple work with mlpack, you have seen how it can
-easily plug into a data science workflow in Python.  A great thing to do next
-would be to look at more documentation for the Python mlpack bindings:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/python_documentation.html">Python mlpack
-   binding documentation</a>
-
-Also, mlpack is much more flexible from C++ and allows much greater
-functionality.  So, more complicated tasks are possible if you are willing to
-write C++ (or perhaps Cython).  To get started learning about mlpack in C++, the
-following resources might be helpful:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/tutorials.html">mlpack
-   C++ tutorials</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/build.html">mlpack
-   build and installation guide</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/sample.html">Simple
-   sample C++ mlpack programs</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/index.html">mlpack
-   Doxygen documentation homepage</a>
-
- */
diff -pruN 3.4.2-7/doc/guide/r_quickstart.hpp 4.0.1-1/doc/guide/r_quickstart.hpp
--- 3.4.2-7/doc/guide/r_quickstart.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/r_quickstart.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,193 +0,0 @@
-/**
- * @file r_quickstart.hpp
- * @author Yashwant Singh Parihar
-
-@page r_quickstart mlpack in R quickstart guide
-
-@section r_quickstart_intro Introduction
-
-This page describes how you can quickly get started using mlpack from R and
-gives a few examples of usage, and pointers to deeper documentation.
-
-This quickstart guide is also available for @ref python_quickstart "Python",
-@ref cli_quickstart "the command-line", @ref julia_quickstart "Julia" and
-@ref go_quickstart "Go".
-
-@section r_quickstart_install Installing mlpack binary package
-
-Installing the mlpack bindings for R is straightforward; you can just use
-CRAN:
-
-@code{.R}
-install.packages('mlpack')
-@endcode
-
-@section r_quickstart_source_install Installing mlpack package from source
-
-Building the R bindings from scratch is a little more in-depth, though.  For
-information on that, follow the instructions on the @ref build page, and be sure
-to specify @c -DBUILD_R_BINDINGS=ON to CMake; you may need to also set the
-location of the R program with @c -DR_EXECUTABLE=/path/to/R.
-
-@section r_quickstart_example Simple mlpack quickstart example
-
-As a really simple example of how to use mlpack from R, let's do some
-simple classification on a subset of the standard machine learning @c covertype
-dataset.  We'll first split the dataset into a training set and a testing set,
-then we'll train an mlpack random forest on the training data, and finally we'll
-print the accuracy of the random forest on the test dataset.
-
-You can copy-paste this code directly into R to run it.
-
-@code{.R}
-if(!requireNamespace("data.table", quietly = TRUE)) { install.packages("data.table") }
-suppressMessages({
-    library("mlpack")
-    library("data.table")
-})
-
-# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
-# want to use on the full dataset.
-df <- fread("https://www.mlpack.org/datasets/covertype-small.csv.gz")
-
-# Split the labels.
-labels <- df[, .(label)]
-dataset <- df[, label:=NULL]
-
-# Split the dataset using mlpack.
-prepdata <- preprocess_split(input = dataset,
-                             input_labels = labels,
-                             test_ratio = 0.3,
-                             verbose = TRUE)
-
-# Train a random forest.
-output <- random_forest(training = prepdata$training,
-                        labels = prepdata$training_labels,
-                        print_training_accuracy = TRUE,
-                        num_trees = 10,
-                        minimum_leaf_size = 3,
-                        verbose = TRUE)
-rf_model <- output$output_model
-
-# Predict the labels of the test points.
-output <- random_forest(input_model = rf_model,
-                        test = prepdata$test,
-                        verbose = TRUE)
-
-# Now print the accuracy.  The third return value ('probabilities'), which we
-# ignored here, could also be used to generate an ROC curve.
-correct <- sum(output$predictions == prepdata$test_labels)
-cat(correct, "out of", length(prepdata$test_labels), "test points correct",
-    correct / length(prepdata$test_labels) * 100.0, "%\n")
-@endcode
-
-We can see that we achieve reasonably good accuracy on the test dataset (80%+);
-if we use the full @c covertype.csv.gz, the accuracy should increase
-significantly (but training will take longer).
-
-It's easy to modify the code above to do more complex things, or to use
-different mlpack learners, or to interface with other machine learning toolkits.
-
-@section r_quickstart_whatelse What else does mlpack implement?
-
-The example above has only shown a little bit of the functionality of mlpack.
-Lots of other commands are available with different functionality.  A full list
-of each of these commands and full documentation can be found on the following
-page:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/r_documentation.html">r documentation</a>
-
-For more information on what mlpack does, see https://www.mlpack.org/.
-Next, let's go through another example for providing movie recommendations with
-mlpack.
-
-@section r_quickstart_movierecs Using mlpack for movie recommendations
-
-In this example, we'll train a collaborative filtering model using mlpack's
-<tt><a href="https://www.mlpack.org/doc/mlpack-git/r_documentation.html#cf">cf()</a></tt> method.  We'll train this on the MovieLens dataset from
-https://grouplens.org/datasets/movielens/, and then we'll use the model that we
-train to give recommendations.
-
-You can copy-paste this code directly into R to run it.
-
-@code{.R}
-if(!requireNamespace("data.table", quietly = TRUE)) { install.packages("data.table") }
-suppressMessages({
-    library("mlpack")
-    library("data.table")
-})
-
-# First, load the MovieLens dataset.  This is taken from files.grouplens.org/
-# but reposted on mlpack.org as unpacked and slightly preprocessed data.
-ratings <- fread("http://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz")
-movies <- fread("http://www.mlpack.org/datasets/ml-20m/movies.csv.gz")
-
-# Hold out 10% of the dataset into a test set so we can evaluate performance.
-predata <- preprocess_split(input = ratings,
-                            test_ratio = 0.1,
-                            verbose = TRUE)
-
-# Train the model.  Change the rank to increase/decrease the complexity of the
-# model.
-output <- cf(training = predata$training,
-             test = predata$test,
-             rank = 10,
-             verbose = TRUE,
-             max_iteration=2,
-             algorithm = "RegSVD")
-cf_model <- output$output_model
-
-# Now query the 5 top movies for user 1.
-output <- cf(input_model = cf_model,
-             query = matrix(1),
-             recommendations = 10,
-             verbose = TRUE)
-
-# Get the names of the movies for user 1.
-cat("Recommendations for user 1:\n")
-for (i in 1:10) {
-  cat("  ", i, ":", as.character(movies[output$output[i], 3]), "\n")
-}
-@endcode
-
-Here is some example output, showing that user 1 seems to have good taste in
-movies:
-
-@code{.unparsed}
-Recommendations for user 1:
-  0: Casablanca (1942)
-  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
-  2: Godfather, The (1972)
-  3: Answer This! (2010)
-  4: Life Is Beautiful (La Vita è bella) (1997)
-  5: Adventures of Tintin, The (2011)
-  6: Dark Knight, The (2008)
-  7: Out for Justice (1991)
-  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
-  9: Schindler's List (1993)
-@endcode
-
-@section r_quickstart_nextsteps Next steps with mlpack
-
-After working through this overview to `mlpack`'s R package, we hope you are
-inspired to use `mlpack`' in your data science workflow.  We recommend as part
-of your next steps to look at more documentation for the R mlpack bindings:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/r_documentation.html">R mlpack
-   binding documentation</a>
-
-Also, mlpack is much more flexible from C++ and allows much greater
-functionality.  So, more complicated tasks are possible if you are willing to
-write C++ (or perhaps Rcpp).  To get started learning about mlpack in C++, the
-following resources might be helpful:
-
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/tutorials.html">mlpack
-   C++ tutorials</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/build.html">mlpack
-   build and installation guide</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/sample.html">Simple
-   sample C++ mlpack programs</a>
- - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/index.html">mlpack
-   Doxygen documentation homepage</a>
-
- */
diff -pruN 3.4.2-7/doc/guide/sample.hpp 4.0.1-1/doc/guide/sample.hpp
--- 3.4.2-7/doc/guide/sample.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/sample.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,106 +0,0 @@
-/*! @page sample Simple Sample mlpack Programs
-
-@section sampleintro Introduction
-
-On this page, several simple mlpack examples are contained, in increasing order
-of complexity.  If you compile from the command-line, be sure that your compiler
-is in C++11 mode.  With modern gcc and clang, this should already be the
-default.
-
-@note
-The command-line programs like @c knn_main.cpp and @c
-logistic_regression_main.cpp from the directory @c src/mlpack/methods/ cannot be
-compiled easily by hand (the same is true for the individual tests in @c
-src/mlpack/tests/); instead, those should be compiled with CMake, by running,
-e.g., @c make @c mlpack_knn or @c make @c mlpack_test; see @ref build.  However,
-any program that uses mlpack (and is not a part of the library itself) can be
-compiled easily with g++ or clang from the command line.
-
-@section covariance Covariance Computation
-
-A simple program to compute the covariance of a data matrix ("data.csv"),
-assuming that the data is already centered, and save it to file.
-
-@code
-// Includes all relevant components of mlpack.
-#include <mlpack/core.hpp>
-
-// Convenience.
-using namespace mlpack;
-
-int main()
-{
-  // First, load the data.
-  arma::mat data;
-  // Use data::Load() which transposes the matrix.
-  data::Load("data.csv", data, true);
-
-  // Now compute the covariance.  We assume that the data is already centered.
-  // Remember, because the matrix is column-major, the covariance operation is
-  // transposed.
-  arma::mat cov = data * trans(data) / data.n_cols;
-
-  // Save the output.
-  data::Save("cov.csv", cov, true);
-}
-@endcode
-
-@section nn Nearest Neighbor
-
-This simple program uses the mlpack::neighbor::NeighborSearch object to find the
-nearest neighbor of each point in a dataset using the L1 metric, and then print
-the index of the neighbor and the distance of it to stdout.
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack;
-using namespace mlpack::neighbor; // NeighborSearch and NearestNeighborSort
-using namespace mlpack::metric; // ManhattanDistance
-
-int main()
-{
-  // Load the data from data.csv (hard-coded).  Use IO for simple command-line
-  // parameter handling.
-  arma::mat data;
-  data::Load("data.csv", data, true);
-
-  // Use templates to specify that we want a NeighborSearch object which uses
-  // the Manhattan distance.
-  NeighborSearch<NearestNeighborSort, ManhattanDistance> nn(data);
-
-  // Create the object we will store the nearest neighbors in.
-  arma::Mat<size_t> neighbors;
-  arma::mat distances; // We need to store the distance too.
-
-  // Compute the neighbors.
-  nn.Search(1, neighbors, distances);
-
-  // Write each neighbor and distance using Log.
-  for (size_t i = 0; i < neighbors.n_elem; ++i)
-  {
-    std::cout << "Nearest neighbor of point " << i << " is point "
-        << neighbors[i] << " and the distance is " << distances[i] << ".\n";
-  }
-}
-@endcode
-
-@section other Other examples
-
-For more complex examples, it is useful to refer to the main executables, found
-in @c src/mlpack/methods/.  A few are listed below.
-
- - methods/neighbor_search/knn_main.cpp
- - methods/neighbor_search/kfn_main.cpp
- - methods/emst/emst_main.cpp
- - methods/radical/radical_main.cpp
- - methods/nca/nca_main.cpp
- - methods/naive_bayes/nbc_main.cpp
- - methods/pca/pca_main.cpp
- - methods/lars/lars_main.cpp
- - methods/linear_regression/linear_regression_main.cpp
- - methods/gmm/gmm_main.cpp
- - methods/kmeans/kmeans_main.cpp
-
-*/
diff -pruN 3.4.2-7/doc/guide/sample_ml_app.hpp 4.0.1-1/doc/guide/sample_ml_app.hpp
--- 3.4.2-7/doc/guide/sample_ml_app.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/sample_ml_app.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,210 +0,0 @@
-/**
- * @file sample_ml_app.hpp
- * @author German Lancioni
-
-@page sample_ml_app Sample C++ ML App for Windows
-
-@section sample_intro Introduction
-
-This tutorial will help you create a sample machine learning app using mlpack/C++. Although this app
-does not cover all the mlpack capabilities, it will walkthrough several APIs to understand how
-everything connects. This Windows sample app is created using Visual Studio, but you can easily
-adapt it to a different platform by following the provided source code.
-
-@note Before starting, make sure you have built mlpack for Windows following this @ref build_windows "Windows guide"
-
-@section sample_create_project Creating the VS project
-
-- Open Visual Studio and create a new project (Windows Console Application)
-- For this sample, the project is named “sample-ml-app”
-
-@section sample_project_config Project Configuration
-
-There are different ways in which you can configure your project to link with dependencies. This configuration
-is for x64 Debug Mode. If you need Release Mode, please change the paths accordingly (assuming you have built
-mlpack and dependencies in Release Mode).
-
-- Right click on the project and select Properties, select the x64 Debug profile
-- Under C/C++ > General > Additional Include Directories add:
-@code
- - C:\boost\boost_1_71_0\lib\native\include
- - C:\mlpack\armadillo-9.800.3\include
- - C:\mlpack\mlpack-3.4.2\build\include
-@endcode
-- Under Linker > Input > Additional Dependencies add:
-@code
- - C:\mlpack\mlpack-3.4.2\build\Debug\mlpack.lib
- - C:\boost\boost_1_71_0\lib64-msvc-14.2\libboost_serialization-vc142-mt-gd-x64-1_71.lib
-@endcode
-- Under Build Events > Post-Build Event > Command Line add:
-@code
- - xcopy /y "C:\mlpack\mlpack-3.4.2\build\Debug\mlpack.dll" $(OutDir)
- - xcopy /y "C:\mlpack\mlpack-3.4.2\packages\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll" $(OutDir)
-@endcode
-
-@note Recent versions of Visual Studio set "Conformance Mode" enabled by default. This causes some issues with
-the armadillo library. If you encounter this issue, disable "Conformance Mode" under C/C++ > Language.
-
-@section sample_app_goal The app goal
-
-This app aims to exercise an end-to-end machine learning workflow. We will cover:
-
-- Loading and preparing a dataset
-- Training (using Random Forest as example)
-- Computing the training accuracy
-- Cross-Validation using K-Fold
-- Metrics gathering (accuracy, precision, recall, F1)
-- Saving the trained model to disk
-- Loading the model
-- Classifying a new sample
-
-@section sample_headers_namespaces Headers and namespaces
-
-For this app, we will need to include the following headers (i.e. add into stdafx.h):
-
-@code
-#include "mlpack/core.hpp"
-#include "mlpack/methods/random_forest/random_forest.hpp"
-#include "mlpack/methods/decision_tree/random_dimension_select.hpp"
-#include "mlpack/core/cv/k_fold_cv.hpp"
-#include "mlpack/core/cv/metrics/accuracy.hpp"
-#include "mlpack/core/cv/metrics/precision.hpp"
-#include "mlpack/core/cv/metrics/recall.hpp"
-#include "mlpack/core/cv/metrics/F1.hpp"
-@endcode
-
-Also, we will use the following namespaces:
-
-@code
-using namespace arma;
-using namespace mlpack;
-using namespace mlpack::tree;
-using namespace mlpack::cv;
-@endcode
-
-@section sample_load_dataset Loading the dataset
-
-First step is about loading the dataset. Different dataset file formats are supported, but here
-we load a CSV dataset, and we assume the labels don't require normalization.
-
-@note Make sure you update the path to your dataset file. For this sample, you can simply
-copy "mlpack/tests/data/german.csv" and paste into a new "data" folder in your project directory.
-
-@code
-mat dataset;
-bool loaded = mlpack::data::Load("data/german.csv", dataset);
-if (!loaded)
-  return -1;
-@endcode
-
-Then we need to extract the labels from the last dimension of the dataset and remove the
-labels from the training set:
-
-@code
-Row<size_t> labels;
-labels = conv_to<Row<size_t>>::from(dataset.row(dataset.n_rows - 1));
-dataset.shed_row(dataset.n_rows - 1);
-@endcode
-
-We now have our dataset ready for training.
-
-@section sample_training Training
-
-This app will use a Random Forest classifier. At first we define the classifier parameters and then
-we create the classifier to train it.
-
-@code
-const size_t numClasses = 2;
-const size_t minimumLeafSize = 5;
-const size_t numTrees = 10;
-
-RandomForest<GiniGain, RandomDimensionSelect> rf;
-
-rf = RandomForest<GiniGain, RandomDimensionSelect>(dataset, labels,
-    numClasses, numTrees, minimumLeafSize);
-@endcode
-
-Now that the training is completed, we quickly compute the training accuracy:
-
-@code
-Row<size_t> predictions;
-rf.Classify(dataset, predictions);
-const size_t correct = arma::accu(predictions == labels);
-cout << "\nTraining Accuracy: " << (double(correct) / double(labels.n_elem));
-@endcode
-
-@section sample_crossvalidation Cross-Validating
-
-Instead of training the Random Forest directly, we could also use K-fold cross-validation for training,
-which will give us a measure of performance on a held-out test set. This can give us a better estimate 
-of how the model will perform when given new data. We also define which metric to use in order
-to assess the quality of the trained model.
-
-@code
-const size_t k = 10;
-KFoldCV<RandomForest<GiniGain, RandomDimensionSelect>, Accuracy> cv(k, 
-    dataset, labels, numClasses);
-double cvAcc = cv.Evaluate(numTrees, minimumLeafSize);
-cout << "\nKFoldCV Accuracy: " << cvAcc;
-@endcode
-
-To compute other relevant metrics, such as Precision, Recall and F1:
-
-@code
-double cvPrecision = Precision<Binary>::Evaluate(rf, dataset, labels);
-cout << "\nPrecision: " << cvPrecision;
-
-double cvRecall = Recall<Binary>::Evaluate(rf, dataset, labels);
-cout << "\nRecall: " << cvRecall;
-
-double cvF1 = F1<Binary>::Evaluate(rf, dataset, labels);
-cout << "\nF1: " << cvF1;
-@endcode
-
-@section sample_save_model Saving the model
-
-Now that our model is trained and validated, we save it to a file so we can use it later. Here we save the
-model that was trained using the entire dataset. Alternatively, we could extract the model from the cross-validation
-stage by using \c cv.Model()
-
-@code
-mlpack::data::Save("mymodel.xml", "model", rf, false);
-@endcode
-
-We can also save the model in \c bin format ("mymodel.bin") which would result in a smaller file.
-
-@section sample_load_model Loading the model
-
-In a real-life application, you may want to load a previously trained model to classify new samples.
-We load the model from a file using:
-
-@code
-mlpack::data::Load("mymodel.xml", "model", rf);
-@endcode
-
-@section sample_classify_sample Classifying a new sample
-
-Finally, the ultimate goal is to classify a new sample using the previously trained model. Since the
-Random Forest classifier provides both predictions and probabilities, we obtain both.
-
-@code
-// Create a test sample containing only one point.  Because Armadillo is
-// column-major, this matrix has one column (one point) and the number of rows
-// is equal to the dimensionality of the point (23).
-mat sample("2; 12; 2; 13; 1; 2; 2; 1; 3; 24; 3; 1; 1; 1; 1; 1; 0; 1; 0; 1;"
-    " 0; 0; 0");
-mat probabilities;
-rf.Classify(sample, predictions, probabilities);
-u64 result = predictions.at(0);
-cout << "\nClassification result: " << result << " , Probabilities: " <<
-    probabilities.at(0) << "/" << probabilities.at(1);
-@endcode
-
-@section sample_app_conclussion Final thoughts
-
-Building real-life applications and services using machine learning can be challenging. Hopefully, this
-tutorial provides a good starting point that covers the basic workflow you may need to follow while
-developing it. You can take a look at the entire source code in the provided sample project located here:
-"doc/examples/sample-ml-app".
-
-*/
diff -pruN 3.4.2-7/doc/guide/timer.hpp 4.0.1-1/doc/guide/timer.hpp
--- 3.4.2-7/doc/guide/timer.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/timer.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,66 +0,0 @@
-/*! @page timer mlpack Timers
-
-@section timerintro Introduction
-
-mlpack provides a simple timer interface for the timing of machine learning
-methods.  The results of any timers used during the program are displayed at
-output by any command-line binding, when --verbose is given:
-
-@code
-$ mlpack_knn -r dataset.csv -n neighbors_out.csv -d distances_out.csv -k 5 -v
-<...>
-[INFO ] Program timers:
-[INFO ]   computing_neighbors: 0.010650s
-[INFO ]   loading_data: 0.002567s
-[INFO ]   saving_data: 0.001115s
-[INFO ]   total_time: 0.149816s
-[INFO ]   tree_building: 0.000534s
-@endcode
-
-@section usingtimer Timer API
-
-The mlpack::Timer class provides three simple methods:
-
-@code
-void Timer::Start(const char* name);
-void Timer::Stop(const char* name);
-timeval Timer::Get(const char* name);
-@endcode
-
-Each timer is given a name, and is referenced by that name.  You can call \c
-Timer::Start() and \c Timer::Stop() multiple times for a particular timer name,
-and the result will be the sum of the runs of the timer.  Note that \c
-Timer::Stop() must be called before \c Timer::Start() is called again,
-otherwise a std::runtime_error exception will be thrown.
-
-A \c "total_time" timer is run by default for each mlpack program.
-
-@section example Timer Example
-
-Below is a very simple example of timer usage in code.
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/core/util/io.hpp>
-#define BINDING_TYPE BINDING_TYPE_CLI
-#include <mlpack/core/util/mlpack_main.hpp>
-
-using namespace mlpack;
-
-void mlpackMain()
-{
-  // Start a timer.
-  Timer::Start("some_timer");
-
-  // Do some things.
-  DoSomeStuff();
-
-  // Stop the timer.
-  Timer::Stop("some_timer");
-}
-@endcode
-
-If the --verbose flag was given to this executable, the time that
-\c "some_timer" ran for would be printed at the end of the program's output.
-
-*/
diff -pruN 3.4.2-7/doc/guide/version.hpp 4.0.1-1/doc/guide/version.hpp
--- 3.4.2-7/doc/guide/version.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/guide/version.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,29 +0,0 @@
-/*! @page verinfo mlpack version information
-
-@section vercode mlpack versions in code
-
-mlpack provides a couple of convenience macros and functions to get the version
-of mlpack.  More information (and straightforward code) can be found in
-src/mlpack/core/util/version.hpp.
-
-The following three macros provide major, minor, and patch versions of mlpack
-(i.e. for mlpack-x.y.z, 'x' is the major version, 'y' is the minor version, and
-'z' is the patch version):
-
-@code
-MLPACK_VERSION_MAJOR
-MLPACK_VERSION_MINOR
-MLPACK_VERSION_PATCH
-@endcode
-
-In addition, the function \c mlpack::util::GetVersion() returns the mlpack
-version as a string (for instance, "mlpack 1.0.8").
-
-@section verex mlpack executable versions
-
-Each mlpack executable supports the \c --version (or \c -V ) option, which will
-print the version of mlpack used.  If the version is not an official release but
-instead from svn trunk, the version will be "mlpack trunk" (and may have a
-revision number appended to "trunk").
-
-*/
diff -pruN 3.4.2-7/doc/policies/elemtype.hpp 4.0.1-1/doc/policies/elemtype.hpp
--- 3.4.2-7/doc/policies/elemtype.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/policies/elemtype.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,42 +0,0 @@
-/*! @page elem The ElemType policy in mlpack
-
-@section elem_overview Overview
-
-\b mlpack algorithms should be as generic as possible.  Often this means
-allowing arbitrary metrics or kernels to be used, but this also means allowing
-any type of data point to be used.  This means that \b mlpack classes should
-support \c float, \c double, and other observation types.  Some algorithms
-support this through the use of a \c MatType template parameter; others will
-have their own template parameter, \c ElemType.
-
-The \c ElemType template parameter can take any value that can be used by
-Armadillo (or, specifically, classes like \c arma::Mat<> and others); this
-encompasses the types
-
- - \c double
- - \c float
- - \c int
- - \c unsigned int
- - \c std::complex<double>
- - \c std::complex<float>
-
-and other primitive numeric types.  Note that Armadillo does not support some
-integer types for functionality such as matrix decompositions or other more
-advanced linear algebra.  This means that when these integer types are used,
-some algorithms may fail with Armadillo error messages indicating that those
-types cannot be used.
-
-@section A note for developers
-
-If the class has a \c MatType template parameter, \c ElemType can be easily
-defined as below:
-
-@code
-typedef typename MatType::elem_type ElemType;
-@endcode
-
-and otherwise a template parameter with the name \c ElemType can be used.  It is
-generally a good idea to expose the element type somehow for use by other
-classes.
-
-*/
diff -pruN 3.4.2-7/doc/policies/functiontype.hpp 4.0.1-1/doc/policies/functiontype.hpp
--- 3.4.2-7/doc/policies/functiontype.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/policies/functiontype.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,114 +0,0 @@
-/*! @page function The FunctionType policy in mlpack
-
-@section Overview
-
-To represent the various types of loss functions encountered in machine
-learning problems, mlpack provides the \c FunctionType template parameter in
-the optimizer interface. The various optimizers available in the core library
-rely on this policy to gain the necessary information required by the optimizing
-algorithm.
-
-The \c FunctionType template parameter required by the Optimizer class can have
-additional requirements imposed on it, depending on the type of optimizer used.
-
-@section requirements Interface requirements
-
-The most basic requirements for the \c FunctionType parameter are the
-implementations of two public member functions, with the following interface
-and semantics
-
-@code
-// Evaluate the loss function at the given coordinates.
-double Evaluate(const arma::mat& coordinates);
-@endcode
-
-
-@code
-// Evaluate the gradient at the given coordinates, where 'gradient' is an
-// output parameter for the required gradient.
-void Gradient(const arma::mat& coordinates, arma::mat& gradient);
-@endcode
-
-
-Optimizers like SGD and RMSProp require a \c DecomposableFunctionType having the
-following requirements
-
-@code
-// Return the number of functions. In a data-dependent function, this would
-// return the number of points in the dataset.
-size_t NumFunctions();
-@endcode
-
-
-@code
-// Evaluate the 'i' th loss function. For example, for a data-dependent
-// function, Evaluate(coordinates, 0) should evaluate the loss function at the
-// first point in the dataset.
-double Evaluate(const arma::mat& coordinates, const size_t i);
-@endcode
-
-@code
-// Evaluate the gradient of the 'i' th loss function at the given coordinates,
-// where 'gradient' is an output parameter for the required gradient.
-void Gradient(const arma::mat& coordinates, const size_t i, arma::mat& gradient);
-@endcode
-
-
-
-\c ParallelSGD optimizer requires a \c SparseFunctionType interface.
-\c SparseFunctionType requires the gradient to be in a sparse matrix (\c
-arma::sp_mat), as ParallelSGD, implemented with the HOGWILD!  scheme of
-unsynchronised updates, is expected to be relevant only in situations where the
-individual gradients are sparse. So, the interface requires function with the
-following signatures
-
-@code
-// Return the number of functions. In a data-dependent function, this would
-// return the number of points in the dataset.
-size_t NumFunctions();
-@endcode
-
-
-@code
-// Evaluate the loss function at the given coordinates.
-double Evaluate(const arma::mat& coordinates);
-@endcode
-
-
-@code
-// Evaluate the (sparse) gradient of the 'i' th loss function at the given
-// coordinates, where 'gradient' is an output parameter for the required
-// gradient.
-void Gradient(const arma::mat& coordinates, const size_t i, arma::sp_mat& gradient);
-@endcode
-
-
-The \c SCD optimizer requires a \c ResolvableFunctionType interface, to
-calculate partial gradients with respect to individual features. The optimizer
-requires the decision variable to be arranged in a particular fashion to allow
-for disjoint updates. The features should be arranged columnwise in the decision
-variable. For example, in \c SoftmaxRegressionFunction the decision variable has
-size \c numClasses x \c featureSize (+ 1 if an intercept also needs to be fit).
-Similarly, for \c LogisticRegression, the decision variable is a row vector,
-with the number of columns determined by the dimensionality of the dataset.
-
-The interface expects the following member functions from the function class
-
-@code
-// Return the number of features in the decision variable.
-size_t NumFeatures();
-@endcode
-
-@code
-// Evaluate the loss function at the given coordinates.
-double Evaluate(const arma::mat& coordinates);
-@endcode
-
-@code
-// Evaluate the partial gradient of the loss function with respect to the 'j' th
-// coordinate at the given coordinates, where 'gradient' is an output parameter
-// for the required gradient. The 'gradient' matrix is supposed to be non-zero
-// in the jth column, which contains the relevant partial gradient.
-void PartialGradient(const arma::mat& coordinates, const size_t j, arma::sp_mat& gradient);
-@endcode
-*/
diff -pruN 3.4.2-7/doc/policies/kernels.hpp 4.0.1-1/doc/policies/kernels.hpp
--- 3.4.2-7/doc/policies/kernels.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/policies/kernels.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,166 +0,0 @@
-/*! @page kernels The KernelType policy in mlpack
-
-@section kerneltoc Table of Contents
-
- - \ref kerneltype
- - \ref kerneltraits
- - \ref kernellist
-
-@section kerneltype Introduction to the KernelType policy
-
-`Kernel methods' make up a large class of machine learning techniques.  Each of
-these methods is characterized by its dependence on a \b kernel \b function.  In
-rough terms, a kernel function is a general notion of similarity between two
-points, with its value large when objects are similar and its value small when
-objects are dissimilar (note that this is not the only interpretation of what a
-kernel is).
-
-A kernel (or `Mercer kernel') \f$\mathcal{K}(\cdot, \cdot)\f$ takes two objects
-as input and returns some sort of similarity value.  The specific details and
-properties of kernels are outside the scope of this documentation; for a better
-introduction to kernels and kernel methods, there are numerous better resources
-available, including
-<a href="http://www.eric-kim.net/eric-kim-net/posts/1/kernel_trick.html">Eric Kim's tutorial</a>
-
-mlpack implements a number of kernel methods and, accordingly, each of these
-methods allows arbitrary kernels to be used via the \c KernelType template
-parameter.  Like the \ref metrics "MetricType policy", the requirements are
-quite simple: a class implementing the \c KernelType policy must have
-
- - an \c Evaluate() function
- - a default constructor
-
-The signature of the \c Evaluate() function is straightforward:
-
-@code
-template<typename VecTypeA, typename VecTypeB>
-double Evaluate(const VecTypeA& a, const VecTypeB& b);
-@endcode
-
-The function takes two vector arguments, \c a and \c b, and returns a \c double
-that is the evaluation of the kernel between the two arguments.  So, for a
-particular kernel \f$\mathcal{K}(\cdot, \cdot)\f$, the \c Evaluate() function
-should return \f$\mathcal{K}(a, b)\f$.
-
-The arguments \c a and \c b, of types \c VecTypeA and \c VecTypeB, respectively,
-will be an Armadillo-like vector type (usually \c arma::vec, \c arma::sp_vec, or
-similar).  In general it should be valid to assume that \c VecTypeA is a class
-with the same API as \c arma::vec.
-
-Note that for kernels that do not hold any state, the \c Evaluate() method can
-be marked as \c static.
-
-Overall, the \c KernelType template policy is quite simple (much like the
-\ref metrics "MetricType policy").  Below is an example kernel class, which
-outputs \c 1 if the vectors are close and \c 0 otherwise.
-
-@code
-class ExampleKernel
-{
-  // Default constructor is required.
-  ExampleKernel() { }
-
-  // The example kernel holds no state, so we can mark Evaluate() as static.
-  template<typename VecTypeA, typename VecTypeB>
-  static double Evaluate(const VecTypeA& a, const VecTypeB& b)
-  {
-    // Get how far apart the vectors are (using the Euclidean distance).
-    const double distance = arma::norm(a - b);
-
-    if (distance < 0.05) // Less than 0.05 distance is "close".
-      return 1;
-    else
-      return 0;
-  }
-};
-@endcode
-
-Then, this kernel may be easily used inside of mlpack algorithms.  For instance,
-the code below runs kernel PCA (\c mlpack::kpca::KernelPCA) on a random dataset
-using the \c ExampleKernel.  The results are saved to a file called
-\c results.csv.  (Note that this is simply an example to demonstrate usage, and
-this example kernel isn't actually likely to be useful in practice.)
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/methods/kernel_pca/kernel_pca.hpp>
-#include "example_kernel.hpp" // Contains the ExampleKernel class.
-
-using namespace mlpack;
-using namespace mlpack::kpca;
-using namespace arma;
-
-int main()
-{
-  // Generate the random dataset; 10 dimensions, 5000 points.
-  mat dataset = randu<mat>(10, 5000);
-
-  // Instantiate the KernelPCA object with the ExampleKernel kernel type.
-  KernelPCA<ExampleKernel> kpca;
-
-  // The dataset will be transformed using kernel PCA with the example kernel to
-  // contain only 2 dimensions.
-  kpca.Apply(dataset, 2);
-
-  // Save the results to 'results.csv'.
-  data::Save(dataset, "results.csv");
-}
-@endcode
-
-@section kerneltraits The KernelTraits trait class
-
-Some algorithms that use kernels can specialize if the kernel fulfills some
-certain conditions.  An example of a condition might be that the kernel is
-shift-invariant or that the kernel is normalized.  In the case of fast
-max-kernel search (mlpack::fastmks::FastMKS), the computation can be accelerated
-if the kernel is normalized.  For this reason, the \c KernelTraits trait class
-exists.  This allows a kernel to specify via a \c const \c static \c bool when
-these types of conditions are satisfied.  **Note that a KernelTraits class
-is not required,** but may be helpful.
-
-The \c KernelTraits trait class is a template class that takes a \c KernelType
-as a parameter, and exposes \c const \c static \c bool values that depend on the
-kernel.  Setting these values is achieved by specialization.  The code below
-provides an example, specializing \c KernelTraits for the \c ExampleKernel from
-earlier:
-
-@code
-template<>
-class KernelTraits<ExampleKernel>
-{
- public:
-  //! The example kernel is normalized (K(x, x) = 1 for all x).
-  const static bool IsNormalized = true;
-};
-@endcode
-
-At this time, there is only one kernel trait that is used in mlpack code:
-
- - \c IsNormalized (defaults to \c false): if \f$ K(x, x) = 1 \; \forall x \f$,
-   then the kernel is normalized and this should be set to true.
-
-@section kernellist List of kernels and classes that use a \c KernelType
-
-mlpack comes with a number of pre-written kernels that satisfy the \c KernelType
-policy:
-
- - mlpack::kernel::LinearKernel
- - mlpack::kernel::ExampleKernel -- an example kernel with more documentation
- - mlpack::kernel::GaussianKernel
- - mlpack::kernel::HyperbolicTangentKernel
- - mlpack::kernel::EpanechnikovKernel
- - mlpack::kernel::CosineDistance
- - mlpack::kernel::LaplacianKernel
- - mlpack::kernel::PolynomialKernel
- - mlpack::kernel::TriangularKernel
- - mlpack::kernel::SphericalKernel
- - mlpack::kernel::PSpectrumStringKernel -- operates on strings, not vectors
-
-These kernels (or a custom kernel) may be used in a variety of mlpack methods:
-
- - mlpack::kpca::KernelPCA - kernel principal components analysis
- - mlpack::fastmks::FastMKS - fast max-kernel search
- - mlpack::kernel::NystroemMethod - the Nystroem method for sampling
- - mlpack::metric::IPMetric - a metric built on a kernel
-
-*/
diff -pruN 3.4.2-7/doc/policies/metrics.hpp 4.0.1-1/doc/policies/metrics.hpp
--- 3.4.2-7/doc/policies/metrics.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/policies/metrics.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,113 +0,0 @@
-/*! @page metrics The MetricType policy in mlpack
-
-Many machine learning methods operate with some sort of metric, and often, this
-metric can be any arbitrary metric.  For instance, consider the problem of
-nearest neighbor search; one can find the nearest neighbor of a point with
-respect to the standard Euclidean distance, or the Manhattan (city-block)
-distance.  The actual search techniques, though, remain the same.  And this is
-true of many machine learning methods: the specific metric that is used can be
-any valid metric.
-
-mlpack algorithms, when possible, allow the use of an arbitrary metric via the
-use of the \c MetricType template parameter.  Any metric passed as a
-\c MetricType template parameter will need to have
-
- - an \c Evaluate function
- - a default constructor.
-
-The signature of the \c Evaluate function is straightforward:
-
-@code
-template<typename VecTypeA, typename VecTypeB>
-double Evaluate(const VecTypeA& a, const VecTypeB& b);
-@endcode
-
-The function takes two vector arguments, \c a and \c b, and returns a \c double
-that is the evaluation of the metric between the two arguments.  So, for a
-particular metric \f$d(\cdot, \cdot)\f$, the \c Evaluate() function should
-return \f$d(a, b)\f$.
-
-The arguments \c a and \c b, of types \c VecTypeA and \c VecTypeB, respectively,
-will be an Armadillo-like vector type (usually \c arma::vec, \c arma::sp_vec, or
-similar).  In general it should be valid to assume that \c VecTypeA is a class
-with the same API as \c arma::vec.
-
-Note that for metrics that do not hold any state, the \c Evaluate() method can
-be marked as \c static.
-
-Overall, the \c MetricType template policy is quite simple (much like the
-\ref kernels KernelType policy).  Below is an example metric class, which
-implements the L2 distance:
-
-@code
-class ExampleMetric
-{
-  // Default constructor is required.
-  ExampleMetric() { }
-
-  // The example metric holds no state, so we can mark Evaluate() as static.
-  template<typename VecTypeA, typename VecTypeB>
-  static double Evaluate(const VecTypeA& a, const VecTypeB& b)
-  {
-    // Return the L2 norm of the difference between the points, which is the
-    // same as the L2 distance.
-    return arma::norm(a - b);
-  }
-};
-@endcode
-
-Then, this metric can easily be used inside of other mlpack algorithms.  For
-example, the code below runs range search on a random dataset with the
-\c ExampleKernel, by instantiating a \c mlpack::range::RangeSearch object that
-uses the \c ExampleKernel.  Then, the number of results are printed.  The \c
-RangeSearch class takes three template parameters: \c MetricType, \c MatType,
-and \c TreeType.  (All three have defaults, so we will just leave \c MatType and
-\c TreeType to their defaults.)
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/methods/range_search/range_search.hpp>
-#include "example_metric.hpp" // A file that contains ExampleKernel.
-
-using namespace mlpack;
-using namespace mlpack::range;
-using namespace std;
-
-int main()
-{
-  // Create a random dataset with 10 dimensions and 5000 points.
-  arma::mat data = arma::randu<arma::mat>(10, 5000);
-
-  // Instantiate the RangeSearch object with the ExampleKernel.
-  RangeSearch<ExampleKernel> rs(data);
-
-  // These vectors will store the results.
-  vector<vector<size_t>> neighbors;
-  vector<vector<double>> distances;
-
-  // Create a random 10-dimensional query point.
-  arma::vec query = arma::randu<arma::vec>(10);
-
-  // Find those points with distance (according to ExampleMetric) between 1 and
-  // 2 from the query point.
-  rs.Search(query, math::Range(1.0, 2.0), neighbors, distances);
-
-  // Now, print the number of points inside the desired range.  We know that
-  // neighbors and distances will have length 1, since there was only one query
-  // point.
-  cout << neighbors[0].size() << " points within the range [1.0, 2.0] of the "
-      << "query point!" << endl;
-}
-@endcode
-
-mlpack comes with a number of pre-written metrics that satisfy the \c MetricType
-policy:
-
- - mlpack::metric::ManhattanDistance
- - mlpack::metric::EuclideanDistance
- - mlpack::metric::ChebyshevDistance
- - mlpack::metric::MahalanobisDistance
- - mlpack::metric::LMetric (for arbitrary L-metrics)
- - mlpack::metric::IPMetric (requires a \ref kernels "KernelType" parameter)
-
-*/
diff -pruN 3.4.2-7/doc/policies/trees.hpp 4.0.1-1/doc/policies/trees.hpp
--- 3.4.2-7/doc/policies/trees.hpp	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/policies/trees.hpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,906 +0,0 @@
-/*! @page trees The TreeType policy in mlpack
-
-@section treeintro Introduction
-
-Trees are an important data structure in mlpack and are used in a number of the
-machine learning algorithms that mlpack implements.  Often, the use of trees can
-allow significant acceleration of an algorithm; this is generally done by
-pruning away large parts of the tree during computation.
-
-Most mlpack algorithms that use trees are not tied to a specific tree but
-instead allow the user to choose a tree via the \c TreeType template parameter.
-Any tree passed as a \c TreeType template parameter will need to implement a
-certain set of functions.  In addition, a tree may optionally specify some
-traits about itself with the \c TreeTraits trait class.
-
-This document aims to clarify the abstractions underlying mlpack trees, list and
-describe the required functionality of the \c TreeType policy, and point users
-towards existing types of trees.  A table of contents is below:
-
- - \ref treeintro
- - \ref whatistree
- - \ref treetype_template_params
- - \ref treetype_api
- - \ref treetype_rigorous
-   - \ref treetype_rigorous_template
-   - \ref treetype_rigorous_constructor
-   - \ref treetype_rigorous_basic
-   - \ref treetype_rigorous_complex
-   - \ref treetype_rigorous_serialization
- - \ref treetype_traits
- - \ref treetype_more
-
-Although this document is long, there may still be errors and unclear areas.  If
-you are having trouble understanding anything, please get in touch on Github or
-on the mailing list and someone will help you (and possibly update the
-documentation afterwards).
-
-@section whatistree What is a tree?
-
-In mlpack, we assume that we have some sort of data matrix, which might be
-sparse or dense (that is, it could be of type \c arma::mat or \c arma::sp_mat,
-or any variant that implements the Armadillo API).  This data matrix corresponds
-to a collection of points in some space (usually a Euclidean space).  A tree is
-a way of organizing this data matrix in a hierarchical manner---so, points that
-are nearby should lie in similar nodes.
-
-We can rigorously define what a tree is, using the definition of **space tree**
-introduced in the following paper:
-
-@code
-@quote
-R.R. Curtin, W.B. March, P. Ram, D.V. Anderson, A.G. Gray, and C.L. Isbell Jr.,
-"Tree-independent dual-tree algorithms," in Proceedings of the 30th
-International Conference on Machine Learning (ICML '13), pp. 1435--1443, 2013.
-@endquote
-@endcode
-
-The definition is:
-
-A **space tree** on a dataset \f$ S \in \mathcal{R}^{N \times d} \f$ is an
-undirected, connected, acyclic, rooted simple graph with the following
-properties:
-
- - Each node (or vertex) holds a number of points (possibly zero) and is
-connected to one parent node and a number of child nodes (possibly zero).
-
- - There is one node in every space tree with no parent; this is the root node
-of the tree.
-
- - Each point in \f$S\f$ is contained in at least one node.
-
- - Each node corresponds to some subset of \f$\mathcal{R}^d\f$ that contains
-each point in the node and also the subsets that correspond to each child of the
-node.
-
-This is really a quite straightforward definition: a tree is hierarchical, and
-each node corresponds to some region of the input space.  Each node may have
-some number of children, and may hold some number of points.  However, there is
-an important terminology distinction to make: the term **points held by a node**
-has a different meaning than the term **descendant points held by a node**.  The
-points held in a node are just that---points held only in the node.  The
-descendant points of a node are the combination of the points held in a node
-with the points held in the node's children and the points held in the node's
-children's children (and so forth).  For the purposes of clarity in all
-discussions about trees, care is taken to differentiate the terms "descendant
-point" and "point".
-
-Now, it's also important to note that a point does not *need* to hold any
-children, and that a node *can* hold the same points as its children (or its
-parent).  Some types of trees do this.  For instance, each node in the cover
-tree holds only one point, and may have a child that holds the same point.  As
-another example, the \f$kd\f$-tree holds its points only in the leaves (at the
-bottom of the tree).  More information on space trees can be found in either the
-"Tree-independent dual-tree algorithms" paper or any of the related literature.
-
-So there is a huge amount of possible variety in the types of trees that can
-fall into the class of *space trees*.  Therefore, it's important to treat them
-abstractly, and the \c TreeType policy allows us to do just that.  All we need
-to remember is that a node in a tree can be represented as the combination of
-some points held in the node, some child nodes, and some geometric structure
-that represents the space that all of the descendant points fall into (this is a
-restatement of the fourth part of the definition).
-
-@section treetype_template_params Template parameters required by the TreeType policy
-
-Most everything in mlpack is decomposed into a series of configurable template
-parameters, and trees are no exception.  In order to ease usage of high-level
-mlpack algorithms, each \c TreeType itself must be a template class taking three
-parameters:
-
- - \c MetricType -- the underlying metric that the tree will be built on (see
-\ref metrics "the MetricType policy documentation")
- - \c StatisticType -- holds any auxiliary information that individual
-algorithms may need
- - \c MatType -- the type of the matrix used to represent the data
-
-The reason that these three template parameters are necessary is so that each
-\c TreeType can be used as a template template parameter, which can radically
-simplify the required syntax for instantiating mlpack algorithms.  By using
-template template parameters, a user needs only to write
-
-@code
-// The RangeSearch class takes a MetricType and a TreeType template parameter.
-
-// This code instantiates RangeSearch with the ManhattanDistance and a
-// QuadTree.  Note that the QuadTree itself is a template, and takes a
-// MetricType, StatisticType, and MatType, just like the policy requires.
-
-// This example ignores the constructor parameters, for the sake of simplicity.
-RangeSearch<ManhattanDistance, QuadTree> rs(...);
-@endcode
-
-as opposed to the far more complicated alternative, where the user must specify
-the values of each template parameter of the tree type:
-
-@code
-// This is a much worse alternative, where the user must specify the template
-// arguments of their tree.
-RangeSearch<ManhattanDistance,
-            QuadTree<ManhattanDistance, EmptyStatistic, arma::mat>> rs(...);
-@endcode
-
-Unfortunately, the price to pay for this user convenience is that *every*
-\c TreeType must have three template parameters, and they must be in exactly
-that order.  Fortunately, there is an additional benefit: we are guaranteed that
-the tree is built using the same metric as the method (that is, a user can't
-specify different metric types to the algorithm and to the tree, which they can
-without template template parameters).
-
-There are two important notes about this:
-
- - Not every possible input of MetricType, StatisticType, and/or MatType
-necessarily need to be valid or work correctly for each type of tree.  For
-instance, the QuadTree is limited to Euclidean metrics and will not work
-otherwise.  Either compile-time static checks or detailed documentation can help
-keep users from using invalid combinations of template arguments.
-
- - Some types of trees have more template parameters than just these three.  One
-example is the generalized binary space tree, where the bounding shape of each
-node is easily made into a fourth template parameter (the \c BinarySpaceTree
-class calls this the \c BoundType parameter), and the procedure used to split a
-node is easily made into a fifth template parameter (the \c BinarySpaceTree
-class calls this the \c SplitType parameter).  However, the syntax of template
-template parameters *requires* that the class only has the correct number of
-template parameters---no more, no less.  Fortunately, C++11 allows template
-typedefs, which can be used to provide partial specialization of template
-classes:
-
-@code
-// This is the definition of the BinarySpaceTree class, which has five template
-// parameters.
-template<typename MetricType,
-         typename StatisticType,
-         typename MatType,
-         typename BoundType,
-         typename SplitType>
-class BinarySpaceTree;
-
-// The 'using' keyword gives us a template typedef, so we can define the
-// MeanSplitKDTree template class, which has three parameters and is a valid
-// TreeType policy class.
-template<typename MetricType, typename StatisticType, typename MatType>
-using MeanSplitKDTree = BinarySpaceTree<MetricType,
-                                        StatisticType,
-                                        MatType,
-                                        HRectBound<MetricType>
-                                        MeanSplit<BoundType, MetricType>>;
-@endcode
-
-Now, the \c MeanSplitKDTree class has only three template parameters and can be
-used as a \c TreeType policy class in various mlpack algorithms.  Many types of
-trees in mlpack have more than three template parameters and rely on template
-typedefs to provide simplified \c TreeType interfaces.
-
-@section treetype_api The TreeType API
-
-As a result of the definition of *space tree* in the previous section, a
-simplified API presents itself quite easily.  However, more complex
-functionality is often necessary in mlpack, so this leads to more functions
-being necessary for a class to satisfy the \c TreeType policy.  Combining this
-with the template parameters required for trees given in the previous section
-gives us the complete API required for a class implementing the \c TreeType
-policy.  Below is the minimal set of functions required with minor
-documentation for each function.  (More extensive documentation and explanation
-is given afterwards.)
-
-@code
-// The three template parameters will be supplied by the user, and are detailed
-// in the previous section.
-template<typename MetricType,
-         typename StatisticType,
-         typename MatType>
-class ExampleTree
-{
- public:
-  //////////////////////
-  //// Constructors ////
-  //////////////////////
-
-  // This batch constructor does not modify the dataset, and builds the entire
-  // tree using a default-constructed MetricType.
-  ExampleTree(const MatType& data);
-
-  // This batch constructor does not modify the dataset, and builds the entire
-  // tree using the given MetricType.
-  ExampleTree(const MatType& data, MetricType& metric);
-
-  // Initialize the tree from a given boost::serialization archive.  SFINAE (the
-  // second argument) is necessary to ensure that the archive is loading, not
-  // saving.
-  template<typename Archive>
-  ExampleTree(
-      Archive& ar,
-      const typename boost::enable_if<typename Archive::is_loading>::type* = 0);
-
-  // Release any resources held by the tree.
-  ~ExampleTree();
-
-  // ///////////////////////// //
-  // // Basic functionality // //
-  // ///////////////////////// //
-
-  // Get the dataset that the tree is built on.
-  const MatType& Dataset();
-
-  // Get the metric that the tree is built with.
-  MetricType& Metric();
-
-  // Get/modify the StatisticType for this node.
-  StatisticType& Stat();
-
-  // Return the parent of the node, or NULL if this is the root.
-  ExampleTree* Parent();
-
-  // Return the number of children held by the node.
-  size_t NumChildren();
-  // Return the i'th child held by the node.
-  ExampleTree& Child(const size_t i);
-
-  // Return the number of points held in the node.
-  size_t NumPoints();
-  // Return the index of the i'th point held in the node.
-  size_t Point(const size_t i);
-
-  // Return the number of descendant nodes of this node.
-  size_t NumDescendantNodes();
-  // Return the i'th descendant node of this node.
-  ExampleTree& DescendantNode(const size_t i);
-
-  // Return the number of descendant points of this node.
-  size_t NumDescendants();
-  // Return the index of the i'th descendant point of this node.
-  size_t Descendant(const size_t i);
-
-  // Store the center of the bounding region of the node in the given vector.
-  void Center(arma::vec& center);
-
-  // ///////////////////////////////////////////////// //
-  // // More complex distance-related functionality // //
-  // ///////////////////////////////////////////////// //
-
-  // Return the distance between the center of this node and the center of
-  // its parent.
-  double ParentDistance();
-
-  // Return an upper bound on the furthest possible distance between the
-  // center of the node and any point held in the node.
-  double FurthestPointDistance();
-
-  // Return an upper bound on the furthest possible distance between the
-  // center of the node and any descendant point of the node.
-  double FurthestDescendantDistance();
-
-  // Return a lower bound on the minimum distance between the center and any
-  // edge of the node's bounding shape.
-  double MinimumBoundDistance();
-
-  // Return a lower bound on the minimum distance between the given point and
-  // the node.
-  template<typename VecType>
-  double MinDistance(VecType& point);
-
-  // Return a lower bound on the minimum distance between the given node and
-  // this node.
-  double MinDistance(ExampleTree& otherNode);
-
-  // Return an upper bound on the maximum distance between the given point and
-  // the node.
-  template<typename VecType>
-  double MaxDistance(VecType& point);
-
-  // Return an upper bound on the maximum distance between the given node and
-  // this node.
-  double MaxDistance(ExampleTree& otherNode);
-
-  // Return the combined results of MinDistance() and MaxDistance().
-  template<typename VecType>
-  math::Range RangeDistance(VecType& point);
-
-  // Return the combined results of MinDistance() and MaxDistance().
-  math::Range RangeDistance(ExampleTree& otherNode);
-
-  // //////////////////////////////////// //
-  // // Serialization (loading/saving) // //
-  // //////////////////////////////////// //
-
-  // Return a string representation of the tree.
-  std::string ToString() const;
-
-  // Serialize the tree (load from the given archive / save to the given
-  // archive, depending on its type).
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int version);
-
- protected:
-  // A default constructor; only meant to be used by boost::serialization.  This
-  // must be protected so that boost::serialization will work; it does not need
-  // to return a valid tree.
-  ExampleTree();
-
-  // Friend access must be given for the default constructor.
-  friend class boost::serialization::access;
-};
-@endcode
-
-Although this is significantly more complex than the four-item definition of
-*space tree* might suggest, it turns out many of these methods are not
-difficult to implement for most reasonable tree types.  It is also important to
-realize that this is a *minimum* API; you may implement more complex tree types
-at your leisure (and you may include more template parameters too, though you
-will have to use template typedefs to provide versions with three parameters;
-see \ref treetype_template_params "the previous section").
-
-Before diving into the detailed documentation for each function, let us consider
-a few important points about the implications of this API:
-
- - **Trees are not default-constructible** and should not (in general) provide
-a default constructor.  This helps prevent invalid trees.  In general, any
-instantiated mlpack object should be valid and ready to use---and a tree built
-on no points is not valid or ready to use.
-
- - **Trees only need to provide batch constructors.**  Although many tree types
-do have algorithms for incremental insertions, in mlpack this is not required
-because the tree-based algorithms that mlpack implements generally assume
-fully-built, non-modifiable trees.  For this purpose, batch construction is
-perfectly sufficient.  (It's also worth pointing out that for some types of
-trees, like kd-trees, the cost of a handful of insertions often outweighs the
-cost of completely rebuilding the tree.)
-
- - **Trees must provide a number of distance bounding functions.**  The utility
-of trees generally stems from the ability to place quick bounds on
-distance-related quantities.  For instance, if all the descendant points of a
-node are bounded by a ball of radius \f$\lambda\f$ and the center of the node
-is a point \f$c\f$, then the minimum distance between some point \f$p\f$ and any
-descendant point of the node is equal to the distance between \f$p\f$ and
-\f$c\f$ minus the radius \f$\lambda\f$: \f$d(p, c) - \lambda\f$.  This is a fast
-calculation, and (usually) provides a decent bound on the minimum distance
-between \f$p\f$ and any descendant point of the node.
-
- - **Trees need to be able to be serialized.**  mlpack uses the
-boost::serialization library for saving and loading objects.  Trees---which can
-be a part of machine learning models---therefore must have the ability to be
-saved and loaded.  Making this all work requires a protected constructor (part
-of the API) and generally makes it impossible to hold references instead of
-pointers internally, because if a tree is loaded from a file then it must own
-the dataset it is built on and the metric it uses (this also means that a
-destructor must exist for freeing these resources).
-
-Now, we can consider each part of the API more rigorously.
-
-@section treetype_rigorous Rigorous API documentation
-
-This section is divided into five parts:
-
- - \ref treetype_rigorous_template
- - \ref treetype_rigorous_constructor
- - \ref treetype_rigorous_basic
- - \ref treetype_rigorous_complex
- - \ref treetype_rigorous_serialization
-
-@subsection treetype_rigorous_template Template parameters
-
-\ref treetype_template_params "An earlier section" discussed the three different
-template parameters that are required by the \c TreeType policy.
-
-The \ref metrics "MetricType policy" provides one method that will be useful for
-tree building and other operations:
-
-@code
-// This function is required by the MetricType policy.
-// Evaluate the metric between two points (which may be of different types).
-template<typename VecTypeA, typename VecTypeB>
-double Evaluate(const VecTypeA& a, const VecTypeB& b);
-@endcode
-
-Note that this method is not necessarily static, so a \c MetricType object
-should be held internally and its \c Evaluate() method should be called whenever
-the distance between two points is required.  **It is generally a bad idea to
-hardcode any distance calculation in your tree.**  This will make the tree
-unable to generalize to arbitrary metrics.  If your tree must depend on certain
-assumptions holding about the metric (i.e. the metric is a Euclidean metric),
-then make that clear in the documentation of the tree, so users do not try to
-use the tree with an inappropriate metric.
-
-The second template parameter, \c StatisticType, is for auxiliary information
-that is required by certain algorithms.  For instance, consider an algorithm
-which repeatedly uses the variance of the descendant points of a node.  It might
-be tempting to add a \c Variance() method to the required \c TreeType API, but
-this quickly leads to code bloat (after all, the API already has quite enough
-functions as it is).  Instead, it is better to create a \c StatisticType class
-which provides the \c Variance() method, and then call \c Stat().Variance() when
-the variance is required.  This also holds true for cached data members.
-
-Each node should have its own instance of a \c StatisticType class.  The
-\c StatisticType must provide the following constructors:
-
-@code
-// Default constructor required by the StatisticType policy.
-StatisticType();
-
-// This constructor is required by the StatisticType policy.
-template<typename TreeType>
-StatisticType(TreeType& node);
-@endcode
-
-This constructor should be called with \c (*this) after the node is constructed
-(usually, this ends up being the last line in the constructor of a node).
-
-The last template parameter is the \c MatType parameter.  This is generally
-\c arma::mat or \c arma::sp_mat, but could be any Armadillo type, including
-matrices that hold data points of different precisions (such as \c float or even
-\c int).  It generally suffices to write \c MatType assuming that \c arma::mat
-will be used, since the vast majority of the time this will be what is used.
-
-@subsection treetype_rigorous_constructor Constructors and destructors
-
-The \c TreeType API requires at least three constructors.  Technically, it does
-not *require* a destructor, but almost certainly your tree class will be doing
-some memory management internally and should have one (though not always).
-
-The first two constructors are variations of the same idea:
-
-@code
-// This batch constructor does not modify the dataset, and builds the entire
-// tree using a default-constructed MetricType.
-ExampleTree(const MatType& data);
-
-// This batch constructor does not modify the dataset, and builds the entire
-// tree using the given MetricType.
-ExampleTree(const MatType& data, MetricType& metric);
-@endcode
-
-All that is required here is that a constructor is available that takes a
-dataset and optionally an instantiated metric.  If no metric is provided, then
-it should be assumed that the \c MetricType class has a default constructor and
-a default-constructed metric should be used.  The constructor *must* return a
-valid, fully-constructed, ready-to-use tree that satisfies the definition
-of *space tree* that was \ref whatistree "given earlier".
-
-It is possible to implement both these constructors as one by using \c
-boost::optional.
-
-The third constructor requires the tree to be initializable from a \c
-boost::serialization archive:
-
-@code
-// Initialize the tree from a given boost::serialization archive.  SFINAE (the
-// second argument) is necessary to ensure that the archive is loading, not
-// saving.
-template<typename Archive>
-ExampleTree(
-    Archive& ar,
-    const typename boost::enable_if<typename Archive::is_loading>::type* = 0);
-@endcode
-
-This has implications on how the tree must be stored.  In this case, the dataset
-is *not yet loaded* and therefore the tree **may be required to have
-ownership of the data matrix**.  This means that realistically the most
-reasonable way to represent the data matrix internally in a tree class is not
-with a reference but instead with a pointer.  If this is true, then a destructor
-will be required:
-
-@code
-// Release any resources held by the tree.
-~ExampleTree();
-@endcode
-
-and, if the data matrix is represented internally with a pointer, this
-destructor will need to release the memory for the data matrix (in the case that
-the tree was created via \c boost::serialization ).
-
-Note that these constructors are not necessarily the only constructors that a
-\c TreeType implementation can provide.  One important example of when more
-constructors are useful is when the tree rearranges points internally; this
-might be desired for the sake of speed or memory optimization.  But to do this
-with the required constructors would necessarily incur a copy of the data
-matrix, because the user will pass a \c "const MatType&".  One alternate
-solution is to provide a constructor which takes an rvalue reference to a
-\c MatType:
-
-@code
-template<typename Archive>
-ExampleTree(MatType&& data);
-@endcode
-
-(and another overload that takes an instantiated metric), and then the user can
-use \c std::move() to build the tree without copying the data matrix, although
-the data matrix will be modified:
-
-@code
-ExampleTree exTree(std::move(dataset));
-@endcode
-
-It is, of course, possible to add even more constructors if desired.
-
-@subsection treetype_rigorous_basic Basic tree functionality
-
-The basic functionality of a class implementing the \c TreeType API is quite
-straightforward and intuitive.
-
-@code
-// Get the dataset that the tree is built on.
-const MatType& Dataset();
-@endcode
-
-This should return a \c const reference to the dataset the tree is built on.
-The fact that this function is required essentially means that each node in the
-tree must store a pointer to the dataset (this is not the only option, but it is
-the most obvious option).
-
-@code
-// Get the metric that the tree is built with.
-MetricType& Metric();
-@endcode
-
-Each node must also store an instantiated metric or a pointer to one (note that
-this is required even for metrics that have no state and have a \c static \c
-Evaluate() function).
-
-@code
-// Get/modify the StatisticType for this node.
-StatisticType& Stat();
-@endcode
-
-As discussed earlier, each node must hold a \c StatisticType; this is accessible
-through the \c Stat() function.
-
-@code
-// Return the parent of the node, or NULL if this is the root.
-ExampleTree* Parent();
-
-// Return the number of children held by the node.
-size_t NumChildren();
-// Return the i'th child held by the node.
-ExampleTree& Child(const size_t i);
-
-// Return the number of points held in the node.
-size_t NumPoints();
-// Return the index of the i'th point held in the node.
-size_t Point(const size_t i);
-
-// Return the number of descendant nodes of this node.
-size_t NumDescendantNodes();
-// Return the i'th descendant node of this node.
-ExampleTree& DescendantNode(const size_t i);
-
-// Return the number of descendant points of this node.
-size_t NumDescendants();
-// Return the index of the i'th descendant point of this node.
-size_t Descendant(const size_t i);
-@endcode
-
-These functions are all fairly self-explanatory.  Most algorithms will use the
-\c Parent(), \c Children(), \c NumChildren(), \c Point(), and \c NumPoints()
-functions, so care should be taken when implementing those functions to ensure
-they will be efficient.  Note that \c Point() and \c Descendant() should return
-indices of points, so the actual points can be accessed by calling
-\c "Dataset().col(Point(i))" for some index \c i (or something similar).
-
-An important note about the \c Descendant() function is that each descendant
-point should be unique.  So if a node holds the point with index 6 and it has
-one child that holds the points with indices 6 and 7, then \c NumDescendants()
-should return 2, not 3.  The ordering in which the descendants are returned can
-be arbitrary; so, \c Descendant(0) can return 6 \b or 7, and \c Descendant(1)
-should return the other index.
-
-@code
-// Store the center of the bounding region of the node in the given vector.
-void Center(arma::vec& center);
-@endcode
-
-The last function, \c Center(), should calculate the center of the bounding
-shape and store it in the given vector.  So, for instance, if the tree is a ball
-tree, then the center is simply the center of the ball.  Algorithm writers would
-be wise to try and avoid the use of \c Center() if possible, since it will
-necessarily cost a copy of a vector.
-
-@subsection treetype_rigorous_complex Complex tree functionality and bounds
-
-A node in a tree should also be able to calculate various distance-related
-bounds; these are particularly useful in tree-based algorithms.  Note that any
-of these bounds does not necessarily need to be maximally tight; generally it is
-more important that each bound can be easily calculated.
-
-Details on each bounding function that the \c TreeType API requires are given
-below.
-
-@code
-// Return the distance between the center of this node and the center of
-// its parent.
-double ParentDistance();
-@endcode
-
-Remember that each node corresponds to some region in the space that the dataset
-lies in.  For most tree types this shape is often something geometrically
-simple: a ball, a cone, a hyperrectangle, a slice, or something similar.  The
-\c ParentDistance() function should return the distance between the center of
-this node's region and the center of the parent node's region.
-
-In practice this bound is often used in dual-tree (or single-tree) algorithms to
-place an easy \c MinDistance() (or \c MaxDistance() ) bound for a child node;
-the parent's \c MinDistance() (or \c MaxDistance() ) function is called and then
-adjusted with \c ParentDistance() to provide a possibly loose but efficient
-bound on what the result of \c MinDistance() (or \c MaxDistance() ) would be
-with the child.
-
-@code
-// Return an upper bound on the furthest possible distance between the
-// center of the node and any point held in the node.
-double FurthestPointDistance();
-
-// Return an upper bound on the furthest possible distance between the
-// center of the node and any descendant point of the node.
-double FurthestDescendantDistance();
-@endcode
-
-It is often very useful to be able to bound the radius of a node, which is
-effectively what \c FurthestDescendantDistance() does.  Often it is easiest to
-simply calculate and cache the furthest descendant distance at tree construction
-time.  Some trees, such as the cover tree, are able to give guarantees that the
-points held in the node will necessarily be closer than the descendant points;
-therefore, the \c FurthestPointDistance() function is also useful.
-
-It is permissible to simply have \c FurthestPointDistance() return the result of
-\c FurthestDescendantDistance(), and that will still be a valid bound, but
-depending on the type of tree it may be possible to have \c
-FurthestPointDistance() return a tighter bound.
-
-@code
-// Return a lower bound on the minimum distance between the center and any
-// edge of the node's bounding shape.
-double MinimumBoundDistance();
-@endcode
-
-This is, admittedly, a somewhat complex and weird quantity.  It is one of the
-less important bounding functions, so it is valid to simply return 0...
-
-The bound is a bound on the minimum distance between the center of the node and
-any edge of the shape that bounds all of the descendants of the node.  So, if
-the bounding shape is a ball (as in a ball tree or a cover tree), then
-\c MinimumBoundDistance() should just return the radius of the ball.  If the
-bounding shape is a hypercube (as in a generalized octree), then
-\c MinimumBoundDistance() should return the side length divided by two.  If the
-bounding shape is a hyperrectangle (as in a kd-tree or a spill tree), then
-\c MinimumBoundDistance() should return half the side length of the
-hyperrectangle's smallest side.
-
-@code
-// Return a lower bound on the minimum distance between the given point and
-// the node.
-template<typename VecType>
-double MinDistance(VecType& point);
-
-// Return a lower bound on the minimum distance between the given node and
-// this node.
-double MinDistance(ExampleTree& otherNode);
-
-// Return an upper bound on the maximum distance between the given point and
-// the node.
-template<typename VecType>
-double MaxDistance(VecType& point);
-
-// Return an upper bound on the maximum distance between the given node and
-// this node.
-double MaxDistance(ExampleTree& otherNode);
-
-// Return the combined results of MinDistance() and MaxDistance().
-template<typename VecType>
-math::Range RangeDistance(VecType& point);
-
-// Return the combined results of MinDistance() and MaxDistance().
-math::Range RangeDistance(ExampleTree& otherNode);
-@endcode
-
-These six functions are almost without a doubt the most important functionality
-of a tree.  Therefore, it is preferable that these methods be implemented as
-efficiently as possible, as they may potentially be called many millions of
-times in a tree-based algorithm.  It is also preferable that these bounds be as
-tight as possible.  In tree-based algorithms, these are used for pruning away
-work, and tighter bounds mean that more pruning is possible.
-
-Of these six functions, there are only really two bounds that are desired here:
-the *minimum distance* between a node and an object, and the *maximum distance*
-between a node and an object.  The object may be either a vector (usually \c
-arma::vec ) or another tree node.
-
-Consider the first case, where the object is a vector.  The result of
-\c MinDistance() needs to be less than or equal to the true minimum distance,
-which could be calculated as below:
-
-@code
-// We assume that we have a vector 'vec', and a tree node 'node'.
-double trueMinDist = DBL_MAX;
-for (size_t i = 0; i < node.NumDescendants(); ++i)
-{
-  const double dist = node.Metric().Evaluate(vec,
-      node.Dataset().col(node.Descendant(i)));
-  if (dist < trueMinDist)
-    trueMinDist = dist;
-}
-// At the end of the loop, trueMinDist will hold the true minimum distance
-// between 'vec' and any descendant point of 'node'.
-@endcode
-
-Often the bounding shape of a node will allow a quick calculation that will make
-a reasonable bound.  For instance, if the node's bounding shape is a ball with
-radius \c r and center \c ctr, the calculation is simply
-\c "(node.Metric().Evaluate(vec, ctr) - r)".  Usually a good \c MinDistance() or
-\c MaxDistance() function will make only one call to the \c Evaluate() function
-of the metric.
-
-The \c RangeDistance() function allows a way for both bounds to be calculated at
-once.  It is possible to implement this as a call to \c MinDistance() followed
-by a call to \c MaxDistance(), but this may incur more metric \c Evaluate()
-calls than necessary.  Often calculating both bounds at once can be more
-efficient and can be done with fewer \c Evaluate() calls than calling both
-\c MinDistance() and \c MaxDistance().
-
-@subsection treetype_rigorous_serialization Serialization
-
-The last two public functions that the \c TreeType API requires are related to
-serialization and printing.
-
-@code
-// Return a string representation of the tree.
-std::string ToString() const;
-@endcode
-
-There are few restrictions on the precise way that the \c ToString() function
-should operate, but generally it should behave similarly to the \c ToString()
-function in other mlpack methods.  Generally, a user will call \c ToString()
-when they want to inspect the object and see what it looks like.  For a tree,
-printing the entire tree may be way more information than the user was
-expecting, so it may be a better option to print either only the node itself or
-the node plus one or two levels of children.
-
-@code
-// Serialize the tree (load from the given archive / save to the given
-// archive, depending on its type).
-template<typename Archive>
-void Serialize(Archive& ar, const unsigned int version);
-
-protected:
-// A default constructor; only meant to be used by boost::serialization.  This
-// must be protected so that boost::serialization will work; it does not need
-// to return a valid tree.
-ExampleTree();
-
-// Friend access must be given for the default constructor.
-friend class boost::serialization::access;
-@endcode
-
-On the other hand, the specifics of the functionality required for the
-\c Serialize() function are somewhat more difficult.  The \c Serialize()
-function will be called either when a tree is being saved to disk or loaded from
-disk.  The \c boost::serialization documentation is fairly comprehensive, but
-when writing a \c Serialize() method for mlpack trees you should use
-\c data::CreateNVP() instead of \c BOOST_SERIALIZATION_NVP().  This is because
-mlpack classes implement \c Serialize() instead of \c serialize() in order to
-conform to the mlpack style guidelines, and making this work requires some
-interesting shim code, which is hidden inside of \c data::CreateNVP().  It may
-be useful to look at other \c Serialize() methods contained in other mlpack
-classes as an example.
-
-An important note is that it is very difficult to use references with
-\c boost::serialization, because \c Serialize() may be called at any time during
-the object's lifetime, and references cannot be re-seated.  In general this will
-require the use of pointers, which then require manual memory management.
-Therefore, be careful that \c Serialize() (and the tree's destructor) properly
-handle memory management!
-
-@section treetype_traits The TreeTraits trait class
-
-Some tree-based algorithms can specialize if the tree fulfills certain
-conditions.  For instance, if the regions represented by two sibling nodes
-cannot overlap, an algorithm may be able to perform a simpler computation.
-Based on this reasoning, the \c TreeTraits trait class (much like the
-mlpack::kernel::KernelTraits class) exists in order to allow a tree to specify
-(via a \c const \c static \c bool) when these types of conditions are
-satisfied.  **Note that a TreeTraits class is not required,** but may be
-helpful.
-
-The \c TreeTraits trait class is a template class that takes a \c TreeType as a
-parameter, and exposes \c const \c static \c bool values that depend on the
-tree.  Setting these values is achieved by specialization.  The code below shows
-the default \c TreeTraits values (these are the values that will be used if no
-specialization is provided for a given \c TreeType).
-
-@code
-template<typename TreeType>
-class TreeTraits
-{
- public:
-  // This is true if the subspaces represented by the children of a node can
-  // overlap.
-  static const bool HasOverlappingChildren = true;
-
-  // This is true if Point(0) is the centroid of the node.
-  static const bool FirstPointIsCentroid = false;
-
-  // This is true if the points contained in the first child of a node
-  // (Child(0)) are also contained in that node.
-  static const bool HasSelfChildren = false;
-
-  // This is true if the tree rearranges points in the dataset when it is built.
-  static const bool RearrangesDataset = false;
-
-  // This is true if the tree always has only two children.
-  static const bool BinaryTree = false;
-};
-@endcode
-
-An example specialization for the \ref mlpack::tree::KDTree class is given
-below.  Note that \ref mlpack::tree::KDTree is itself a template class (like
-every class satisfying the \c TreeType policy), so we are specializing to a
-template parameter.
-
-@code
-template<typename MetricType,
-         typename StatisticType,
-         typename MatType>
-template<>
-class TreeTraits<KDTree<MetricType, StatisticType, MatType>>
-{
- public:
-  // The regions represented by the two children of a node may not overlap.
-  static const bool HasOverlappingChildren = false;
-
-  // There is no guarantee that the first point of a node is the centroid.
-  static const bool FirstPointIsCentroid = false;
-
-  // Points are not contained at multiple levels (only at the leaves).
-  static const bool HasSelfChildren = false;
-
-  // Points are rearranged during the building of the tree.
-  static const bool RearrangesDataset = true;
-
-  // The tree is always binary.
-  static const bool BinaryTree = true;
-};
-@endcode
-
-Currently, the traits available are each of the five detailed above.  For more
-information, see the \ref mlpack::tree::TreeTraits documentation.
-
-@section treetype_more A list of trees in mlpack and more information
-
-mlpack contains several ready-to-use implementations of trees that satisfy the
-TreeType policy API:
-
- - mlpack::tree::KDTree
- - mlpack::tree::MeanSplitKDTree
- - mlpack::tree::BallTree
- - mlpack::tree::MeanSplitBallTree
- - mlpack::tree::RTree
- - mlpack::tree::RStarTree
- - mlpack::tree::StandardCoverTree
-
-Often, these are template typedefs of more flexible tree classes:
-
- - mlpack::tree::BinarySpaceTree -- binary trees, such as the KD-tree and ball
-   tree
- - mlpack::tree::RectangleTree -- the R tree and variants
- - mlpack::tree::CoverTree -- the cover tree and variants
-
-*/
diff -pruN 3.4.2-7/doc/quickstart/cli.md 4.0.1-1/doc/quickstart/cli.md
--- 3.4.2-7/doc/quickstart/cli.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/quickstart/cli.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,180 @@
+# mlpack command-line quickstart guide
+
+This page describes how you can quickly get started using mlpack from the
+command-line and gives a few examples of usage, and pointers to deeper
+documentation.
+
+This quickstart guide is also available for [C++](cpp.md), [Python](python.md),
+[R](R.md), [Julia](julia.md), and [Go](go.md).
+
+## Installing mlpack
+
+Installing mlpack is straightforward and can be done with your system's package
+manager.  For instance, for Ubuntu or Debian the command is simply
+
+```sh
+sudo apt-get install mlpack-bin
+```
+
+On Fedora or Red Hat:
+
+```sh
+sudo dnf install mlpack
+```
+
+If you use a different distribution, mlpack may be packaged under a different
+name.  And if it is not packaged, you can use a Docker image from Dockerhub:
+
+```sh
+docker run -it mlpack/mlpack /bin/bash
+```
+
+This Docker image has mlpack's command-line bindings already built and
+installed.
+
+If you prefer to build mlpack from scratch, see the
+[main README](../../README.md).
+
+## Simple quickstart example
+
+As a really simple example of how to use mlpack from the command-line, let's do
+some simple classification on a subset of the standard machine learning
+`covertype` dataset.  We'll first split the dataset into a training set and a
+testing set, then we'll train an mlpack random forest on the training data, and
+finally we'll print the accuracy of the random forest on the test dataset.
+
+You can copy-paste this code directly into your shell to run it.
+
+```sh
+# Get the dataset and unpack it.
+wget https://www.mlpack.org/datasets/covertype-small.data.csv.gz
+wget https://www.mlpack.org/datasets/covertype-small.labels.csv.gz
+gunzip covertype-small.data.csv.gz covertype-small.labels.csv.gz
+
+# Split the dataset; 70% into a training set and 30% into a test set.
+# Each of these options has a shorthand single-character option but here we type
+# it all out for clarity.
+mlpack_preprocess_split                                       \
+    --input_file covertype-small.data.csv                     \
+    --input_labels_file covertype-small.labels.csv            \
+    --training_file covertype-small.train.csv                 \
+    --training_labels_file covertype-small.train.labels.csv   \
+    --test_file covertype-small.test.csv                      \
+    --test_labels_file covertype-small.test.labels.csv        \
+    --test_ratio 0.3                                          \
+    --verbose
+
+# Train a random forest.
+mlpack_random_forest                                  \
+    --training_file covertype-small.train.csv         \
+    --labels_file covertype-small.train.labels.csv    \
+    --num_trees 10                                    \
+    --minimum_leaf_size 3                             \
+    --print_training_accuracy                         \
+    --output_model_file rf-model.bin                  \
+    --verbose
+
+# Now predict the labels of the test points and print the accuracy.
+# Also, save the test set predictions to the file 'predictions.csv'.
+mlpack_random_forest                                    \
+    --input_model_file rf-model.bin                     \
+    --test_file covertype-small.test.csv                \
+    --test_labels_file covertype-small.test.labels.csv  \
+    --predictions_file predictions.csv                  \
+    --verbose
+```
+
+We can see by looking at the output that we achieve reasonably good accuracy on
+the test dataset (80%+).  The file `predictions.csv` could also be used by
+other tools; for instance, we can easily calculate the number of points that
+were predicted incorrectly:
+
+```sh
+$ diff -U 0 predictions.csv covertype-small.test.labels.csv | grep '^@@' | wc -l
+```
+
+It's easy to modify the code above to do more complex things, or to use
+different mlpack learners, or to interface with other machine learning toolkits.
+
+## Using mlpack for movie recommendations
+
+In this example, we'll train a collaborative filtering model using mlpack's
+`mlpack_cf` program.  We'll train this on the
+[MovieLens dataset](https://grouplens.org/datasets/movielens/), and then we'll
+use the model that we train to give recommendations.
+
+You can copy-paste this code directly into the command line to run it.
+
+```sh
+wget https://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz
+wget https://www.mlpack.org/datasets/ml-20m/movies.csv.gz
+gunzip ratings-only.csv.gz
+gunzip movies.csv.gz
+
+# Hold out 10% of the dataset into a test set so we can evaluate performance.
+mlpack_preprocess_split                 \
+    --input_file ratings-only.csv       \
+    --training_file ratings-train.csv   \
+    --test_file ratings-test.csv        \
+    --test_ratio 0.1                    \
+    --verbose
+
+# Train the model.  Change the rank to increase/decrease the complexity of the
+# model.
+mlpack_cf                             \
+    --training_file ratings-train.csv \
+    --test_file ratings-test.csv      \
+    --rank 10                         \
+    --algorithm RegSVD                \
+    --output_model_file cf-model.bin  \
+    --verbose
+
+# Now query the 5 top movies for user 1.
+echo "1" > query.csv;
+mlpack_cf                             \
+    --input_model_file cf-model.bin   \
+    --query_file query.csv            \
+    --recommendations 10              \
+    --output_file recommendations.csv \
+    --verbose
+
+# Get the names of the movies for user 1.
+echo "Recommendations for user 1:"
+for i in `seq 1 10`; do
+    item=`cat recommendations.csv | awk -F',' '{ print $'$i' }'`;
+    head -n $(($item + 2)) movies.csv | tail -1 | \
+        sed 's/^[^,]*,[^,]*,//' | \
+        sed 's/\(.*\),.*$/\1/' | sed 's/"//g';
+done
+```
+
+Here is some example output, showing that user 1 seems to have good taste in
+movies:
+
+```
+Recommendations for user 1:
+Casablanca (1942)
+Pan's Labyrinth (Laberinto del fauno, El) (2006)
+Godfather, The (1972)
+Answer This! (2010)
+Life Is Beautiful (La Vita è bella) (1997)
+Adventures of Tintin, The (2011)
+Dark Knight, The (2008)
+Out for Justice (1991)
+Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
+Schindler's List (1993)
+```
+## Next steps with mlpack
+
+Now that you have done some simple work with mlpack, you have seen how it can
+easily plug into a data science production workflow for the command line.  But
+these two examples have only shown a little bit of the functionality of mlpack.
+Lots of other commands are available with different functionality.  A full list
+of commands and full documentation for each can be found on the following page:
+
+ - [CLI program documentation](https://www.mlpack.org/doc/stable/cli_documentation.html)
+
+Also, mlpack is much more flexible from C++ and allows much greater
+functionality.  So, more complicated tasks are possible if you are willing to
+write C++.  To get started learning about mlpack in C++, the
+[C++ quickstart](cpp.md) is a good place to start.
diff -pruN 3.4.2-7/doc/quickstart/cpp.md 4.0.1-1/doc/quickstart/cpp.md
--- 3.4.2-7/doc/quickstart/cpp.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/quickstart/cpp.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,280 @@
+# mlpack in C++ quickstart
+
+This page describes how you can quickly get started using mlpack in C++ and
+gives a few examples of usage, and pointers to deeper documentation.
+
+Keep in mind that mlpack also has interfaces to other languages, and quickstart
+guides for those other languages are available too.  If that is what you are
+looking for, see the quickstarts for [Python](python.md),
+[the command line](cli.md), [Julia](julia.md), [R](R.md), or [Go](go.md).
+
+## Installing mlpack
+
+To use mlpack in C++, you only need the header files associated with the
+libraries, and the dependencies Armadillo and ensmallen (detailed in the
+[main README](../../README.md)).  The headers may already be pre-packaged for
+your distribution; for instance, for Ubuntu and Debian you can simply run the
+command
+
+```sh
+sudo apt-get install mlpack-dev
+```
+
+and on Fedora or Red Hat:
+
+```sh
+sudo dnf install mlpack
+```
+
+If you run a different distribution, mlpack may be packaged under a different
+name.  And if it is not packaged, you can use a Docker image from Dockerhub:
+
+```sh
+docker run -it mlpack/mlpack /bin/bash
+```
+
+This Docker image has mlpack headers already installed.
+
+If you prefer to build mlpack from scratch, see the
+[main README](../../README.md).
+
+## Installing mlpack from vcpkg
+
+The mlpack port in vcpkg is kept up to date by Microsoft team members and community contributors. The url of vcpkg is: https://github.com/Microsoft/vcpkg . You can download and install mlpack using the vcpkg dependency manager:
+
+```shell
+git clone https://github.com/Microsoft/vcpkg.git
+cd vcpkg
+./bootstrap-vcpkg.sh  # ./bootstrap-vcpkg.bat for Windows
+./vcpkg integrate install
+./vcpkg install mlpack
+```
+
+If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
+## Simple quickstart example
+
+As a really simple example of how to use mlpack in C++, let's do some simple
+classification on a subset of the standard machine learning `covertype` dataset.
+We'll first split the dataset into a training set and a test set, then we'll
+train an mlpack random forest on the training data, and finally we'll print the
+accuracy of the random forest on the test dataset.
+
+The first step is to download the covertype dataset onto your system so that it
+is available for the program.  A shell command below is given to do this:
+
+```sh
+# Get the dataset and unpack it.
+wget https://www.mlpack.org/datasets/covertype-small.data.csv.gz
+wget https://www.mlpack.org/datasets/covertype-small.labels.csv.gz
+gunzip covertype-small.data.csv.gz covertype-small.labels.csv.gz
+```
+
+With that in place, let's write a C++ program to split the data and perform the
+classification:
+
+```c++
+// Define these to print extra informational output and warnings.
+#define MLPACK_PRINT_INFO
+#define MLPACK_PRINT_WARN
+#include <mlpack.hpp>
+
+using namespace arma;
+using namespace mlpack;
+using namespace mlpack::tree;
+using namespace std;
+
+int main()
+{
+  // Load the datasets.
+  mat dataset;
+  Row<size_t> labels;
+  if (!data::Load("covertype-small.data.csv", dataset))
+    throw std::runtime_error("Could not read covertype-small.data.csv!");
+  if (!data::Load("covertype-small.labels.csv", labels))
+    throw std::runtime_error("Could not read covertype-small.labels.csv!");
+
+  // Labels are 1-7, but we want 0-6 (we are 0-indexed in C++).
+  labels -= 1;
+
+  // Now split the dataset into a training set and test set, using 30% of the
+  // dataset for the test set.
+  mat trainDataset, testDataset;
+  Row<size_t> trainLabels, testLabels;
+  data::Split(dataset, labels, trainDataset, testDataset, trainLabels,
+      testLabels, 0.3);
+
+  // Create the RandomForest object and train it on the training data.
+  RandomForest r(trainDataset,
+                 trainLabels,
+                 7 /* number of classes */,
+                 10 /* number of trees */,
+                 3 /* minimum leaf size */);
+
+  // Compute and print the training error.
+  Row<size_t> trainPredictions;
+  r.Classify(trainDataset, trainPredictions);
+  const double trainError =
+      arma::accu(trainPredictions != trainLabels) * 100.0 / trainLabels.n_elem;
+  cout << "Training error: " << trainError << "%." << endl;
+
+  // Now compute predictions on the test points.
+  Row<size_t> testPredictions;
+  r.Classify(testDataset, testPredictions);
+  const double testError =
+      arma::accu(testPredictions != testLabels) * 100.0 / testLabels.n_elem;
+  cout << "Test error: " << testError << "%." << endl;
+}
+```
+
+Now, you can compile the program with your favorite C++ compiler; here's an
+example command that uses `g++`, and assumes the file above is saved as
+`cpp_quickstart_1.cpp`.
+
+```sh
+g++ -O3 -std=c++14 -o cpp_quickstart_1 cpp_quickstart_1.cpp -larmadillo -fopenmp
+```
+
+Then, you can run the program easily:
+
+```sh
+./cpp_quickstart_1
+```
+
+We can see by looking at the output that we achieve reasonably good accuracy on
+the test dataset (80%+):
+
+```
+Training error: 19.4329%.
+Test error: 24.17%.
+```
+
+It's easy to modify the code above to do more complex things, or to use
+different mlpack learners, or to interface with other machine learning toolkits.
+
+## Using mlpack for movie recommendations
+
+In this example, we'll train a collaborative filtering model using mlpack's `CF`
+class.  We'll train this on this
+[MovieLens dataset](https://grouplens.org/datasets/movielens/), and then we'll
+use the model that we train to give recommendations.
+
+First, download the MovieLens dataset:
+
+```sh
+wget https://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz
+wget https://www.mlpack.org/datasets/ml-20m/movies.csv.gz
+gunzip ratings-only.csv.gz movies.csv.gz
+```
+
+Next, we can use the following C++ code:
+
+```cpp
+// Define these to print extra informational output and warnings.
+#define MLPACK_PRINT_INFO
+#define MLPACK_PRINT_WARN
+#include <mlpack.hpp>
+
+using namespace arma;
+using namespace mlpack;
+using namespace mlpack::cf;
+using namespace std;
+
+int main()
+{
+  // Load the ratings.
+  mat ratings;
+  if (!data::Load("ratings-only.csv", ratings))
+    throw std::runtime_error("Could not load ratings-only.csv!");
+  // Now, load the names of the movies as a single-feature categorical dataset.
+  // We can use `moviesInfo.UnmapString(i, 0)` to get the i'th string.
+  data::DatasetInfo moviesInfo;
+  mat movies; // This will be unneeded.
+  if (!data::Load("movies.csv", movies, moviesInfo))
+    throw std::runtime_error("Could not load movies.csv!");
+
+  // Split the ratings into a training set and a test set, using 10% of the
+  // dataset for the test set.
+  mat trainRatings, testRatings;
+  data::Split(ratings, trainRatings, testRatings, 0.1);
+
+  // Train the CF model using RegularizedSVD as the decomposition algorithm.
+  // Here we use a rank of 10 for the decomposition.
+  CFType<RegSVDPolicy> cf(
+      trainRatings,
+      RegSVDPolicy(),
+      5, /* number of users to use for similarity computations */
+      10 /* rank of decomposition */);
+
+  // Now compute the RMSE for the test set user and item combinations.  To do
+  // this we must assemble the list of users and items.
+  Mat<size_t> combinations(2, testRatings.n_cols);
+  for (size_t i = 0; i < testRatings.n_cols; ++i)
+  {
+    combinations(0, i) = size_t(testRatings(0, i)); // (user)
+    combinations(1, i) = size_t(testRatings(1, i)); // (item)
+  }
+  vec predictions;
+  cf.Predict(combinations, predictions);
+  const double rmse = norm(predictions - testRatings.row(2).t(), 2) /
+      sqrt((double) testRatings.n_cols);
+  std::cout << "RMSE of trained model is " << rmse << "." << endl;
+
+  // Compute the top 10 movies for user 1.
+  Col<size_t> users = { 1 };
+  Mat<size_t> recommendations;
+  cf.GetRecommendations(10, recommendations, users);
+
+  // Now print each movie.
+  cout << "Recommendations for user 1:" << endl;
+  for (size_t i = 0; i < recommendations.n_elem; ++i)
+  {
+    cout << "  " << (i + 1) << ". "
+        << moviesInfo.UnmapString(recommendations[i], 2) << "." << endl;
+  }
+}
+```
+
+This can be compiled the same way as before, assuming the code is saved as
+`cpp_quickstart_2.cpp`:
+
+```sh
+g++ -O3 -std=c++14 -o cpp_quickstart_2 cpp_quickstart_2.cpp -fopenmp -larmadillo
+```
+
+And then it can be easily run:
+
+```
+./cpp_quickstart_2
+```
+
+Here is some example output, showing that user 1 seems to have good taste in
+movies:
+
+```
+RMSE of trained model is 0.795323.
+Recommendations for user 1:
+  1: Casablanca (1942)
+  2: Pan's Labyrinth (Laberinto del fauno, El) (2006)
+  3: Godfather, The (1972)
+  4: Answer This! (2010)
+  5: Life Is Beautiful (La Vita è bella) (1997)
+  6: Adventures of Tintin, The (2011)
+  7: Dark Knight, The (2008)
+  8: Out for Justice (1991)
+  9: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
+  10: Schindler's List (1993)
+```
+
+## Next steps with mlpack
+
+Now that you have done some simple work with mlpack, you have seen how it can
+easily plug into a data science production workflow in C++.  But these two
+examples have only shown a little bit of the functionality of mlpack.  Lots of
+other functionality is available.
+
+Some of this functionality is demonstrated in the
+[examples repository](https://github.com/mlpack/examples).
+
+A full list of all classes and functions that mlpack implements can be found by
+browsing the well-commented source code.
diff -pruN 3.4.2-7/doc/quickstart/go.md 4.0.1-1/doc/quickstart/go.md
--- 3.4.2-7/doc/quickstart/go.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/quickstart/go.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,201 @@
+# mlpack in Go quickstart guide
+
+This page describes how you can quickly get started using mlpack from Go and
+gives a few examples of usage, and pointers to deeper documentation.
+
+This quickstart guide is also available for [C++](cpp.md), [Python](python.md),
+[Julia](julia.md), [the command line](cli.md), and [R](R.md).
+
+## Installing mlpack
+
+Installing the mlpack bindings for Go is somewhat time-consuming as the library
+must be built; you can run the following code:
+
+```sh
+go get -u -d mlpack.org/v1/mlpack
+cd ${GOPATH}/src/mlpack.org/v1/mlpack
+make install
+```
+Building the Go bindings from scratch is a little more in-depth, though.  For
+information on that, follow the instructions in the
+[main README](../../README.md).
+
+## Simple mlpack quickstart example
+
+As a really simple example of how to use mlpack from Go, let's do some
+simple classification on a subset of the standard machine learning `covertype`
+dataset.  We'll first split the dataset into a training set and a testing set,
+then we'll train an mlpack random forest on the training data, and finally we'll
+print the accuracy of the random forest on the test dataset.
+
+You can copy-paste this code directly into main.go to run it.
+
+```go
+package main
+
+import (
+  "mlpack.org/v1/mlpack"
+  "fmt"
+)
+func main() {
+
+  // Download dataset.
+  mlpack.DownloadFile("https://www.mlpack.org/datasets/covertype-small.data.csv.gz",
+                      "data.csv.gz")
+  mlpack.DownloadFile("https://www.mlpack.org/datasets/covertype-small.labels.csv.gz",
+                      "labels.csv.gz")
+
+  // Extract/Unzip the dataset.
+  mlpack.UnZip("data.csv.gz", "data.csv")
+  dataset, _ := mlpack.Load("data.csv")
+
+  mlpack.UnZip("labels.csv.gz", "labels.csv")
+  labels, _ := mlpack.Load("labels.csv")
+
+  // Split the dataset using mlpack.
+  params := mlpack.PreprocessSplitOptions()
+  params.InputLabels = labels
+  params.TestRatio = 0.3
+  params.Verbose = true
+  test, test_labels, train, train_labels :=
+      mlpack.PreprocessSplit(dataset, params)
+
+  // Train a random forest.
+  rf_params := mlpack.RandomForestOptions()
+  rf_params.NumTrees = 10
+  rf_params.MinimumLeafSize = 3
+  rf_params.PrintTrainingAccuracy = true
+  rf_params.Training = train
+  rf_params.Labels = train_labels
+  rf_params.Verbose = true
+  rf_model, _, _ := mlpack.RandomForest(rf_params)
+
+  // Predict the labels of the test points.
+  rf_params_2 := mlpack.RandomForestOptions()
+  rf_params_2.Test = test
+  rf_params_2.InputModel = &rf_model
+  rf_params_2.Verbose = true
+  _, predictions, _ := mlpack.RandomForest(rf_params_2)
+
+  // Now print the accuracy.
+  rows, _ := predictions.Dims()
+  var sum int = 0
+  for i := 0; i < rows; i++ {
+    if (predictions.At(i, 0) == test_labels.At(i, 0)) {
+      sum = sum + 1
+    }
+  }
+  fmt.Print(sum, " correct out of ", rows, " (",
+      (float64(sum) / float64(rows)) * 100, "%).\n")
+}
+```
+
+We can see that we achieve reasonably good accuracy on the test dataset (80%+);
+if we use the full `covertype.csv.gz`, the accuracy should increase
+significantly (but training will take longer).
+
+It's easy to modify the code above to do more complex things, or to use
+different mlpack learners, or to interface with other machine learning toolkits.
+
+## Using mlpack for movie recommendations
+
+In this example, we'll train a collaborative filtering model using mlpack's
+[`cf()`](https://www.mlpack.org/doc/stable/go_documentation.html#cf) method.
+We'll train this on the
+[MovieLens dataset](https://grouplens.org/datasets/movielens/), and then we'll
+use the model that we train to give recommendations.
+
+You can copy-paste this code directly into main.go to run it.
+
+```go
+package main
+
+import (
+  "github.com/frictionlessdata/tableschema-go/csv"
+  "mlpack.org/v1/mlpack"
+  "gonum.org/v1/gonum/mat"
+  "fmt"
+)
+func main() {
+
+  // Download dataset.
+  mlpack.DownloadFile("https://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz",
+                      "ratings-only.csv.gz")
+  mlpack.DownloadFile("https://www.mlpack.org/datasets/ml-20m/movies.csv.gz",
+                      "movies.csv.gz")
+
+  // Extract dataset.
+  mlpack.UnZip("ratings-only.csv.gz", "ratings-only.csv")
+  ratings, _ := mlpack.Load("ratings-only.csv")
+
+  mlpack.UnZip("movies.csv.gz", "movies.csv")
+  table, _ := csv.NewTable(csv.FromFile("movies.csv"), csv.LoadHeaders())
+  movies, _ := table.ReadColumn("title")
+
+  // Split the dataset using mlpack.
+  params := mlpack.PreprocessSplitOptions()
+  params.TestRatio = 0.1
+  params.Verbose = true
+  ratings_test, _, ratings_train, _ := mlpack.PreprocessSplit(ratings, params)
+
+  // Train the model.  Change the rank to increase/decrease the complexity of the
+  // model.
+  cf_params := mlpack.CfOptions()
+  cf_params.Training = ratings_train
+  cf_params.Test = ratings_test
+  cf_params.Rank = 10
+  cf_params.Verbose = true
+  cf_params.Algorithm = "RegSVD"
+  _, cf_model := mlpack.Cf(cf_params)
+
+  // Now query the 5 top movies for user 1.
+  cf_params_2 := mlpack.CfOptions()
+  cf_params_2.InputModel = &cf_model
+  cf_params_2.Recommendations = 10
+  cf_params_2.Query = mat.NewDense(1, 1, []float64{1})
+  cf_params_2.Verbose = true
+  cf_params_2.MaxIterations = 10
+  output, _ := mlpack.Cf(cf_params_2)
+
+  // Get the names of the movies for user 1.
+  fmt.Println("Recommendations for user 1")
+  for i := 0; i < 10; i++ {
+    fmt.Println(i, ":", movies[int(output.At(0 , i))])
+  }
+}
+```
+
+Here is some example output, showing that user 1 seems to have good taste in
+movies:
+
+```
+Recommendations for user 1:
+  0: Casablanca (1942)
+  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
+  2: Godfather, The (1972)
+  3: Answer This! (2010)
+  4: Life Is Beautiful (La Vita è bella) (1997)
+  5: Adventures of Tintin, The (2011)
+  6: Dark Knight, The (2008)
+  7: Out for Justice (1991)
+  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
+  9: Schindler's List (1993)
+```
+
+## Next steps with mlpack
+
+Now that you have done some simple work with mlpack, you have seen how it can
+easily plug into a data science workflow in Go.  But the two examples above have
+only shown a little bit of the functionality of mlpack.  Lots of other methods
+are available with different functionality.  A full list of each of these
+methods and full documentation can be found on the following page:
+
+ - [mlpack Go binding documentation](https://www.mlpack.org/doc/stable/go_documentation.html)
+
+You can also use GoDoc to explore the `mlpack` module and its functions; every
+function comes with comprehensive documentation.
+
+Also, mlpack is much more flexible from C++ and allows much greater
+functionality.  So, more complicated tasks are possible if you are willing to
+write C++.  To get started learning about mlpack in C++, the
+[C++ quickstart](cpp.md) is a good resource to visit next.
diff -pruN 3.4.2-7/doc/quickstart/julia.md 4.0.1-1/doc/quickstart/julia.md
--- 3.4.2-7/doc/quickstart/julia.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/quickstart/julia.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,162 @@
+# mlpack in Julia quickstart guide
+
+This page describes how you can quickly get started using mlpack from Julia and
+gives a few examples of usage, and pointers to deeper documentation.
+
+This quickstart guide is also available for [C++](cpp.md), [Python](python.md),
+[the command line](cli.md), [R](R.md), and [Go](go.md).
+
+## Installing mlpack
+
+Installing the mlpack bindings for Julia is straightforward; you can just use
+`Pkg`:
+
+```julia
+using Pkg
+Pkg.add("mlpack")
+```
+
+Building the Julia bindings from scratch is a little more in-depth, though.  For
+information on that, follow the instructions in the
+[main README](../../README.md).
+
+## Simple quickstart example
+
+As a really simple example of how to use mlpack from Julia, let's do some
+simple classification on a subset of the standard machine learning `covertype`
+dataset.  We'll first split the dataset into a training set and a testing set,
+then we'll train an mlpack random forest on the training data, and finally we'll
+print the accuracy of the random forest on the test dataset.
+
+You can copy-paste this code directly into Julia to run it.  You may need to add
+some extra packages with, e.g., `using Pkg; Pkg.add("CSV");
+Pkg.add("DataFrames"); Pkg.add("Libz")`.
+
+```julia
+using CSV
+using DataFrames
+using Libz
+using mlpack
+
+# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
+# want to use on the full dataset.
+df = CSV.read(ZlibInflateInputStream(open(download(
+        "http://www.mlpack.org/datasets/covertype-small.csv.gz"))))
+
+# Split the labels.
+labels = df[!, :label][:]
+dataset = select!(df, Not(:label))
+
+# Split the dataset using mlpack.
+test, test_labels, train, train_labels = mlpack.preprocess_split(
+    dataset,
+    input_labels=labels,
+    test_ratio=0.3)
+
+# Train a random forest.
+rf_model, _, _ = mlpack.random_forest(training=train,
+                              labels=train_labels,
+                              print_training_accuracy=true,
+                              num_trees=10,
+                              minimum_leaf_size=3)
+
+# Predict the labels of the test points.
+_, predictions, _ = mlpack.random_forest(input_model=rf_model,
+                                         test=test)
+
+# Now print the accuracy.  The third return value ('probabilities'), which we
+# ignored here, could also be used to generate an ROC curve.
+correct = sum(predictions .== test_labels)
+print("$(correct) out of $(length(test_labels)) test points correct " *
+    "($(correct / length(test_labels) * 100.0)%).\n")
+```
+
+We can see that we achieve reasonably good accuracy on the test dataset (80%+);
+if we use the full `covertype.csv.gz`, the accuracy should increase
+significantly (but training will take longer).
+
+It's easy to modify the code above to do more complex things, or to use
+different mlpack learners, or to interface with other machine learning toolkits.
+
+## Using mlpack for movie recommendations
+
+In this example, we'll train a collaborative filtering model using mlpack's
+[`cf()`](https://www.mlpack.org/doc/stable/julia_documentation.html#cf) method.
+We'll train this on the
+[MovieLens dataset](https://grouplens.org/datasets/movielens/), and then we'll
+use the model that we train to give recommendations.
+
+You can copy-paste this code directly into Julia to run it.
+
+```julia
+using CSV
+using mlpack
+using Libz
+using DataFrames
+
+# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
+# want to use on the full dataset.
+ratings = CSV.read(ZlibInflateInputStream(open(download(
+        "http://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz"))))
+movies = CSV.read(ZlibInflateInputStream(open(download(
+        "http://www.mlpack.org/datasets/ml-20m/movies.csv.gz"))))
+
+# Hold out 10% of the dataset into a test set so we can evaluate performance.
+ratings_test, _, ratings_train, _ = mlpack.preprocess_split(ratings;
+    test_ratio=0.1, verbose=true)
+
+# Train the model.  Change the rank to increase/decrease the complexity of the
+# model.
+_, cf_model = mlpack.cf(training=ratings_train,
+                        test=ratings_test,
+                        rank=10,
+                        verbose=true,
+                        algorithm="RegSVD")
+
+# Now query the 5 top movies for user 1.
+output, _ = mlpack.cf(input_model=cf_model,
+                      query=[1],
+                      recommendations=10,
+                      verbose=true,
+                      max_iterations=10)
+
+print("Recommendations for user 1:\n")
+for i in 1:10
+  print("  $(i): $(movies[output[i], :][3])\n")
+end
+```
+
+Here is some example output, showing that user 1 seems to have good taste in
+movies:
+
+```
+Recommendations for user 1:
+  0: Casablanca (1942)
+  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
+  2: Godfather, The (1972)
+  3: Answer This! (2010)
+  4: Life Is Beautiful (La Vita è bella) (1997)
+  5: Adventures of Tintin, The (2011)
+  6: Dark Knight, The (2008)
+  7: Out for Justice (1991)
+  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
+  9: Schindler's List (1993)
+```
+
+## Next steps with mlpack
+
+Now that you have done some simple work with mlpack, you have seen how it can
+easily plug into a data science workflow in Julia.  But the two examples above
+have only shown a little bit of the functionality of mlpack.  Lots of other
+functions are available with different functionality.  A full list of each of
+these commands and full documentation can be found on the following page:
+
+ - [Julia documentation](https://www.mlpack.org/doc/stable/julia_documentation.html)
+
+You can also use the Julia REPL to explore the `mlpack` module and its
+functions; every function comes with comprehensive documentation.
+
+Also, mlpack is much more flexible from C++ and allows much greater
+functionality.  So, more complicated tasks are possible if you are willing to
+write C++ (or perhaps CxxWrap.jl).  To get started learning about mlpack in C++,
+the [C++ quickstart](cpp.md) would be a good place to start.
diff -pruN 3.4.2-7/doc/quickstart/python.md 4.0.1-1/doc/quickstart/python.md
--- 3.4.2-7/doc/quickstart/python.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/quickstart/python.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,169 @@
+# mlpack in Python quickstart guide
+
+This page describes how you can quickly get started using mlpack from Python and
+gives a few examples of usage, and pointers to deeper documentation.
+
+This quickstart guide is also available for [C++](cpp.md),
+[the command line](cli.md), [Julia](julia.md), [R](R.md), and [Go](go.md).
+
+## Installing mlpack
+
+Installing the mlpack bindings for Python is straightforward.  It's easy to use
+`conda` or `pip` to do this:
+
+```sh
+pip install mlpack
+```
+
+```sh
+conda install -c conda-forge mlpack
+```
+
+You can also use the mlpack Docker image on Dockerhub, which has all of the
+Python bindings pre-installed:
+
+```sh
+docker run -it mlpack/mlpack /bin/bash
+```
+
+Otherwise, you can build the Python bindings from scratch using the
+documentation in the [main README](../../README.md).
+
+## Simple mlpack quickstart example
+
+As a really simple example of how to use mlpack from Python, let's do some
+simple classification on a subset of the standard machine learning `covertype`
+dataset.  We'll first split the dataset into a training set and a testing set,
+then we'll train an mlpack random forest on the training data, and finally we'll
+print the accuracy of the random forest on the test dataset.
+
+You can copy-paste this code directly into Python to run it.
+
+```py
+import mlpack
+import pandas as pd
+import numpy as np
+
+# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
+# want to use on the full dataset.
+df = pd.read_csv('http://www.mlpack.org/datasets/covertype-small.csv.gz')
+
+# Split the labels.
+labels = df['label']
+dataset = df.drop('label', 1)
+
+# Split the dataset using mlpack.  The output comes back as a dictionary,
+# which we'll unpack for clarity of code.
+output = mlpack.preprocess_split(input=dataset,
+                                 input_labels=labels,
+                                 test_ratio=0.3)
+training_set = output['training']
+training_labels = output['training_labels']
+test_set = output['test']
+test_labels = output['test_labels']
+
+# Train a random forest.
+output = mlpack.random_forest(training=training_set,
+                              labels=training_labels,
+                              print_training_accuracy=True,
+                              num_trees=10,
+                              minimum_leaf_size=3)
+random_forest = output['output_model']
+
+# Predict the labels of the test points.
+output = mlpack.random_forest(input_model=random_forest,
+                              test=test_set)
+
+# Now print the accuracy.  The 'probabilities' output could also be used
+# to generate an ROC curve.
+correct = np.sum(
+    output['predictions'] == np.reshape(test_labels, (test_labels.shape[0],)))
+print(str(correct) + ' correct out of ' + str(len(test_labels)) + ' (' +
+    str(100 * float(correct) / float(len(test_labels))) + '%).')
+```
+
+We can see that we achieve reasonably good accuracy on the test dataset (80%+);
+if we use the full `covertype.csv.gz`, the accuracy should increase
+significantly (but training will take longer).
+
+It's easy to modify the code above to do more complex things, or to use
+different mlpack learners, or to interface with other machine learning toolkits.
+
+## Using mlpack for movie recommendations
+
+In this example, we'll train a collaborative filtering model using mlpack's
+[`cf()`](https://www.mlpack.org/doc/stable/python_documentation.html#cf) method.
+We'll train this on the
+[MovieLens dataset](https://grouplens.org/datasets/movielens/), and then we'll
+use the model that we train to give recommendations.
+
+You can copy-paste this code directly into Python to run it.
+
+```py
+import mlpack
+import pandas as pd
+import numpy as np
+
+# First, load the MovieLens dataset.  This is taken from files.grouplens.org/
+# but reposted on mlpack.org as unpacked and slightly preprocessed data.
+ratings = pd.read_csv('http://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz')
+movies = pd.read_csv('http://www.mlpack.org/datasets/ml-20m/movies.csv.gz')
+
+# Hold out 10% of the dataset into a test set so we can evaluate performance.
+output = mlpack.preprocess_split(input=ratings, test_ratio=0.1, verbose=True)
+ratings_train = output['training']
+ratings_test = output['test']
+
+# Train the model.  Change the rank to increase/decrease the complexity of the
+# model.
+output = mlpack.cf(training=ratings_train,
+                   test=ratings_test,
+                   rank=10,
+                   verbose=True,
+                   algorithm='RegSVD')
+cf_model = output['output_model']
+
+# Now query the 5 top movies for user 1.
+output = mlpack.cf(input_model=cf_model,
+                   query=[[1]],
+                   recommendations=10,
+                   verbose=True)
+
+# Get the names of the movies for user 1.
+print("Recommendations for user 1:")
+for i in range(10):
+  print("  " + str(i) + ": " + str(movies.loc[movies['movieId'] ==
+      output['output'][0, i]].iloc[0]['title']))
+```
+
+Here is some example output, showing that user 1 seems to have good taste in
+movies:
+
+```
+Recommendations for user 1:
+  0: Casablanca (1942)
+  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
+  2: Godfather, The (1972)
+  3: Answer This! (2010)
+  4: Life Is Beautiful (La Vita è bella) (1997)
+  5: Adventures of Tintin, The (2011)
+  6: Dark Knight, The (2008)
+  7: Out for Justice (1991)
+  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
+  9: Schindler's List (1993)
+```
+
+## Next steps with mlpack
+
+Now that you have done some simple work with mlpack, you have seen how it can
+easily plug into a data science workflow in Python.  But the two examples above
+have only shown a little bit of the functionality of mlpack.  Lots of other
+commands are available with different functionality.  A full list of each of
+these commands and full documentation can be found on the following page:
+
+ - [Python documentation](https://www.mlpack.org/doc/stable/python_documentation.html)
+
+Also, mlpack is much more flexible from C++ and allows much greater
+functionality.  So, more complicated tasks are possible if you are willing to
+write C++ (or perhaps Cython).  To get started learning about mlpack in C++, the
+[C++ quickstart](cpp.md) would be a good place to go.
diff -pruN 3.4.2-7/doc/quickstart/R.md 4.0.1-1/doc/quickstart/R.md
--- 3.4.2-7/doc/quickstart/R.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/quickstart/R.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,162 @@
+# mlpack in R quickstart guide
+
+This page describes how you can quickly get started using mlpack from R and
+gives a few examples of usage, and pointers to deeper documentation.
+
+This quickstart guide is also available for [C++](cpp.md), [Python](python.md),
+[Julia](julia.md), [the command line](cli.md), and [Go](go.md).
+
+## Installing mlpack
+
+Installing the mlpack bindings for R is straightforward; you can just use
+CRAN:
+
+```r
+install.packages('mlpack')
+```
+
+Building the R bindings from scratch is a little more in-depth, though.  For
+information on that, follow the instructions in the
+[main README](../../README.md).
+
+## Simple mlpack quickstart example
+
+As a really simple example of how to use mlpack from R, let's do some
+simple classification on a subset of the standard machine learning `covertype`
+dataset.  We'll first split the dataset into a training set and a testing set,
+then we'll train an mlpack random forest on the training data, and finally we'll
+print the accuracy of the random forest on the test dataset.
+
+You can copy-paste this code directly into R to run it.
+
+```r
+if(!requireNamespace("data.table", quietly = TRUE)) { install.packages("data.table") }
+suppressMessages({
+    library("mlpack")
+    library("data.table")
+})
+
+# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
+# want to use on the full dataset.
+df <- fread("https://www.mlpack.org/datasets/covertype-small.csv.gz")
+
+# Split the labels.
+labels <- df[, .(label)]
+dataset <- df[, label:=NULL]
+
+# Split the dataset using mlpack.
+prepdata <- preprocess_split(input = dataset,
+                             input_labels = labels,
+                             test_ratio = 0.3,
+                             verbose = TRUE)
+
+# Train a random forest.
+output <- random_forest(training = prepdata$training,
+                        labels = prepdata$training_labels,
+                        print_training_accuracy = TRUE,
+                        num_trees = 10,
+                        minimum_leaf_size = 3,
+                        verbose = TRUE)
+rf_model <- output$output_model
+
+# Predict the labels of the test points.
+output <- random_forest(input_model = rf_model,
+                        test = prepdata$test,
+                        verbose = TRUE)
+
+# Now print the accuracy.  The third return value ('probabilities'), which we
+# ignored here, could also be used to generate an ROC curve.
+correct <- sum(output$predictions == prepdata$test_labels)
+cat(correct, "out of", length(prepdata$test_labels), "test points correct",
+    correct / length(prepdata$test_labels) * 100.0, "%\n")
+```
+
+We can see that we achieve reasonably good accuracy on the test dataset (80%+);
+if we use the full `covertype.csv.gz`, the accuracy should increase
+significantly (but training will take longer).
+
+It's easy to modify the code above to do more complex things, or to use
+different mlpack learners, or to interface with other machine learning toolkits.
+
+## Using mlpack for movie recommendations
+
+In this example, we'll train a collaborative filtering model using mlpack's
+[`cf()`](https://www.mlpack.org/doc/stable/r_documentation.html#cf) method.
+We'll train this on the
+[MovieLens dataset](https://grouplens.org/datasets/movielens/), and then we'll
+use the model that we train to give recommendations.
+
+You can copy-paste this code directly into R to run it.
+
+```r
+if(!requireNamespace("data.table", quietly = TRUE)) { install.packages("data.table") }
+suppressMessages({
+    library("mlpack")
+    library("data.table")
+})
+
+# First, load the MovieLens dataset.  This is taken from files.grouplens.org/
+# but reposted on mlpack.org as unpacked and slightly preprocessed data.
+ratings <- fread("http://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz")
+movies <- fread("http://www.mlpack.org/datasets/ml-20m/movies.csv.gz")
+
+# Hold out 10% of the dataset into a test set so we can evaluate performance.
+predata <- preprocess_split(input = ratings,
+                            test_ratio = 0.1,
+                            verbose = TRUE)
+
+# Train the model.  Change the rank to increase/decrease the complexity of the
+# model.
+output <- cf(training = predata$training,
+             test = predata$test,
+             rank = 10,
+             verbose = TRUE,
+             max_iteration=2,
+             algorithm = "RegSVD")
+cf_model <- output$output_model
+
+# Now query the 5 top movies for user 1.
+output <- cf(input_model = cf_model,
+             query = matrix(1),
+             recommendations = 10,
+             verbose = TRUE)
+
+# Get the names of the movies for user 1.
+cat("Recommendations for user 1:\n")
+for (i in 1:10) {
+  cat("  ", i, ":", as.character(movies[output$output[i], 3]), "\n")
+}
+```
+
+Here is some example output, showing that user 1 seems to have good taste in
+movies:
+
+```
+Recommendations for user 1:
+  0: Casablanca (1942)
+  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
+  2: Godfather, The (1972)
+  3: Answer This! (2010)
+  4: Life Is Beautiful (La Vita è bella) (1997)
+  5: Adventures of Tintin, The (2011)
+  6: Dark Knight, The (2008)
+  7: Out for Justice (1991)
+  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
+  9: Schindler's List (1993)
+```
+
+## Next steps with mlpack
+
+After working through this overview to `mlpack`'s R package, we hope you are
+inspired to use `mlpack`' in your data science workflow.  However, the two
+examples above have only shown a little bit of the functionality of mlpack.
+Lots of other functions are available with different functionality.  A full list
+of each of these functions and full documentation can be found on the following
+page:
+
+ - [R documentation](https://www.mlpack.org/doc/stable/r_documentation.html)
+
+Also, mlpack is much more flexible from C++ and allows much greater
+functionality.  So, more complicated tasks are possible if you are willing to
+write C++ (or perhaps Rcpp).  To get started learning about mlpack in C++, a
+good starting point is the [C++ quickstart guide](cpp.md).
diff -pruN 3.4.2-7/doc/tutorials/amf/amf.txt 4.0.1-1/doc/tutorials/amf/amf.txt
--- 3.4.2-7/doc/tutorials/amf/amf.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/amf/amf.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,210 +0,0 @@
-/*!
-
-@file amf.txt
-@author Sumedh Ghaisas
-@brief Tutorial for how to use the AMF class
-
-@page amftutorial Alternating Matrix Factorization tutorial
-
-@section intro_amftut Introduction
-
-Alternating Matrix Factorization
-
-Alternating matrix factorization decomposes matrx V in the form \f$ V \approx WH \f$
-where W is called the basis matrix and H is called the encoding matrix. V is
-taken to be of size n x m and the obtained W is n x r and H is r x m. The size
-r is called the rank of the factorization. Factorization is done by alternately
-calculating W and H respectively while holding the other matrix constant.
-
-\b mlpack provides:
-
- - a \ref amf_amftut "simple C++ interface" to perform Alternating Matrix Factorization
-
-@section toc_amftut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_amftut
- - \ref toc_amftut
- - \ref amf_amftut
-   - \ref t_policy_amftut
-   - \ref init_rule_amftut
-   - \ref update_rule_amftut
-   - \ref nmf_amftut
-   - \ref svd_amftut
- - \ref further_doc_amftut
-
-@section amf_amftut The 'AMF' class
-
-The AMF class is templatized with 3 parameters; the first contains the policy
-used to determine when the algorithm has converged; the second contains the
-initialization rule for the W and H matrix; the last contains the update rule
-to be used during each iteration. This templatization allows the user to try
-various update rules, initialization rules, and termination policies (including
-ones not supplied with mlpack) for factorization.
-
-The class provides the following method that performs factorization
-@code
-template<typename MatType> double Apply(const MatType& V,
-                                        const size_t r,
-                                        arma::mat& W,
-                                        arma::mat& H);
-@endcode
-
-@subsection t_policy_amftut Using different termination policies
-
-The AMF implementation comes with different termination policies to support many
-implemented algorithms. Every termination policy implements the following method
-which returns the status of convergence.
-@code
-bool IsConverged(arma::mat& W, arma::mat& H)
-@endcode
-
-Below is a list of all the termination policies that mlpack contains.
-
- - \ref mlpack::amf::SimpleResidueTermination
- - \ref mlpack::amf::SimpleToleranceTermination
- - \ref mlpack::amf::ValidationRMSETermination
-
-In \c SimpleResidueTermination, termination decision depends on two factors, value
-of residue and number of iteration. If the current value of residue drops below
-the threshold or the number of iterations goes beyond the threshold, positive
-termination signal is passed to AMF.
-
-In \c SimpleToleranceTermination, termination criterion is met when the increase
-in residue value drops below the given tolerance. To accommodate spikes, certain
-number of successive residue drops are accepted. Secondary termination criterion
-terminates algorithm when iteration count goes beyond the threshold.
-
-\c ValidationRMSETermination divides the data into 2 sets, training set and
-validation set. Entries of validation set are nullifed in the input matrix.
-Termination criterion is met when increase in validation set RMSe value drops
-below the given tolerance. To accommodate spikes certain number of successive
-validation RMSE drops are accepted. This upper imit on successive drops can be
-adjusted with \c reverseStepCount. A secondary termination criterion terminates
-the algorithm when the iteration count goes above the threshold. Though this
-termination policy is better measure of convergence than the above 2 termination
-policies, it may cause a decrease in performance since it is computationally
-expensive.
-
-On the other hand, \ref mlpack::amf::CompleteIncrementalTermination
-"CompleteIncrementalTermination" and \ref mlpack::amf::IncompleteIncrementalTermination
-"IncompleteIncrementalTermination" are just wrapper classes for other
-termination policies. These policies are used when AMF is applied with
-\ref mlpack::amf::SVDCompleteIncrementalLearning
-"SVDCompleteIncrementalLearning" and
-\ref mlpack::amf::SVDIncompleteIncrementalLearning
-"SVDIncompleteIncrementalLearning", respectively.
-
-@subsection init_rule_amftut Using different initialization policies
-
-mlpack currently has 2 initialization policies implemented for AMF:
-
- - \ref mlpack::amf::RandomInitialization "RandomInitialization"
- - \ref mlpack::amf::RandomAcolInitialization "RandomAcolInitialization"
-
-\c RandomInitialization initializes matrices W and H with random uniform
-distribution while \c RandomAcolInitialization initializes the W matrix by
-averaging p randomly chosen columns of V.  In the case of
-\c RandomAcolInitialization, p is a template parameter.
-
-To implement their own initialization policy, users need to define the following
-function in their class.
-
-@code
-template<typename MatType>
-inline static void Initialize(const MatType& V,
-                              const size_t r,
-                              arma::mat& W,
-                              arma::mat& H)
-@endcode
-
-@subsection update_rule_amftut Using different update rules
-
-mlpack implements the following update rules for the AMF class:
-
- - \ref mlpack::amf::NMFALSUpdate "AMFALSUpdate"
- - \ref mlpack::amf::NMFMultiplicativeDistanceUpdate "NMFMultiplicativeDistanceUpdate"
- - \ref mlpack::amf::NMFMultiplicativeDivergenceUpdate "NMFMultiplicativeDivergenceUpdate"
- - \ref mlpack::amf::SVDBatchLearning "SVDBatchLearning"
- - \ref mlpack::amf::SVDIncompleteIncrementalLearning "SVDIncompleteIncrementalLearning"
- - \ref mlpack::amf::SVDCompleteIncrementalLearning "SVDCompleteIncrementalLearning"
-
-Non-Negative Matrix factorization can be achieved with \c NMFALSUpdate,
-\c NMFMultiplicativeDivergenceUpdate or \c NMFMultiplicativeDivergenceUpdate.
-\c NMFALSUpdate implements a simple Alternating Least Squares optimization while
-the other rules implement algorithms given in the paper 'Algorithms for
-Non-negative Matrix Factorization'.
-
-The remaining update rules perform the singular value decomposition of the matrix V.
-This SVD factorization is optimized for use by mlpack's collaborative filtering
-code (\ref cftutorial). This use of SVD factorizers for collaborative filtering
-is described in the paper 'A Guide to Singular Value Decomposition for
-Collaborative Filtering' by Chih-Chao Ma. For further details about the
-algorithms refer to the respective class documentation.
-
-@subsection nmf_amftut Using Non-Negative Matrix Factorization with AMF
-
-The use of AMF for Non-Negative Matrix factorization is simple. The AMF module
-defines \ref mlpack::amf::NMFALSFactorizer "NMFALSFactorizer" which can be used
-directly without knowing the internal structure of AMF. For example:
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/methods/amf/amf.hpp>
-
-using namespace std;
-using namespace arma;
-using namespace mlpack::amf;
-
-int main()
-{
-  NMFALSFactorizer nmf;
-  mat W, H;
-  mat V = randu<mat>(100, 100);
-  double residue = nmf.Apply(V, W, H);
-}
-@endcode
-
-\c NMFALSFactorizer uses \c SimpleResidueTermination, which is most preferred
-with Non-Negative Matrix factorizers.  The initialization of W and H in
-\c NMFALSFactorizer is random. The \c Apply() function returns the residue
-obtained by comparing the constructed matrix W * H with the original matrix V.
-
-@subsection svd_amftut Using Singular Value Decomposition with AMF
-
-mlpack has the following SVD factorizers implemented for AMF:
-
- - \ref mlpack::amf::SVDBatchFactorizer "SVDBatchFactorizer"
- - \ref mlpack::amf::SVDIncompleteIncrementalFactorizer "SVDIncompleteIncrementalFactorizer"
- - \ref mlpack::amf::SVDCompleteIncrementalFactorizer "SVDCompleteIncrementalFactorizer"
-
-Each of these factorizers takes a template parameter \c MatType, which specifies
-the type of the matrix V (dense or sparse---these have types \c arma::mat and
-\c arma::sp_mat, respectively).  When the matrix to be factorized is relatively
-sparse, specifying \c MatType \c = \c arma::sp_mat can provide a runtime boost.
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/methods/amf/amf.hpp>
-
-using namespace std;
-using namespace arma;
-using namespace mlpack::amf;
-
-int main()
-{
-  sp_mat V = randu<sp_mat>(100,100);
-  mat W, H;
-
-  SVDBatchFactorizer<sp_mat> svd;
-  double residue = svd.Apply(V, W, H);
-}
-@endcode
-
-@section further_doc_amftut Further documentation
-
-For further documentation on the AMF class, consult the \ref mlpack::amf::AMF
-"complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/amf.md 4.0.1-1/doc/tutorials/amf.md
--- 3.4.2-7/doc/tutorials/amf.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/amf.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,183 @@
+# Alternating Matrix Factorization tutorial
+
+Alternating matrix factorization decomposes a matrix `V` in the form `V ~ WH`
+where `W` is called the basis matrix and `H` is called the encoding matrix.. `V`
+is taken to be of size `n x m` and the obtained `W` is `n x r` and `H` is `r x
+m`. The size `r` is called the *rank* of the factorization. Factorization is
+done by alternately calculating `W` and `H` respectively while holding the other
+matrix constant.
+
+mlpack provides a simple C++ interface to perform Alternating Matrix
+Factorization.
+
+## The `AMF` class
+
+The `AMF` class is templatized with 3 parameters; the first contains the policy
+used to determine when the algorithm has converged; the second contains the
+initialization rule for the `W` and `H` matrix; the last contains the update
+rule to be used during each iteration. This templatization allows the user to
+try various update rules, initialization rules, and termination policies
+(including ones not supplied with mlpack) for factorization.
+
+The class provides the following method that performs factorization
+
+```c++
+template<typename MatType> double Apply(const MatType& V,
+                                        const size_t r,
+                                        arma::mat& W,
+                                        arma::mat& H);
+```
+
+## Using different termination policies
+
+The `AMF` implementation comes with different termination policies to support
+many implemented algorithms. Every termination policy implements the following
+method which returns the status of convergence.
+
+```c++
+bool IsConverged(arma::mat& W, arma::mat& H)
+```
+
+Below is a list of all the termination policies that mlpack contains.
+
+ - `SimpleResidueTermination`
+ - `SimpleToleranceTermination`
+ - `ValidationRMSETermination`
+
+In `SimpleResidueTermination`, the termination decision depends on two factors,
+value of residue and number of iteration. If the current value of residue drops
+below the threshold or the number of iterations goes beyond the threshold,
+positive termination signal is passed to AMF.
+
+In `SimpleToleranceTermination`, termination criterion is met when the increase
+in residue value drops below the given tolerance. To accommodate spikes, certain
+number of successive residue drops are accepted. Secondary termination criterion
+terminates algorithm when iteration count goes beyond the threshold.
+
+`ValidationRMSETermination` divides the data into 2 sets, training set and
+validation set. Entries of the validation set are nullifed in the input matrix.
+Termination criterion is met when increase in validation set RMSe value drops
+below the given tolerance. To accommodate spikes certain number of successive
+validation RMSE drops are accepted. This upper imit on successive drops can be
+adjusted with `reverseStepCount`. A secondary termination criterion terminates
+the algorithm when the iteration count goes above the threshold. Though this
+termination policy is better measure of convergence than the above 2 termination
+policies, it may cause a decrease in performance since it is computationally
+expensive.
+
+On the other hand, `CompleteIncrementalTermination` and
+`IncompleteIncrementalTermination` are just wrapper classes for other
+termination policies. These policies are used when AMF is applied with
+`SVDCompleteIncrementalLearning` and `SVDIncompleteIncrementalLearning`,
+respectively.
+
+## Using different initialization policies
+
+mlpack currently has 2 initialization policies implemented for AMF:
+
+ - `RandomInitialization`
+ - `RandomAcolInitialization`
+
+`RandomInitialization` initializes matrices `W` and `H` with random uniform
+distribution while `RandomAcolInitialization` initializes the `W` matrix by
+averaging p randomly chosen columns of `V`.  In the case of
+`RandomAcolInitialization`, `p` is a template parameter.
+
+To implement their own initialization policy, users need to define the following
+function in their class.
+
+```c++
+template<typename MatType>
+inline static void Initialize(const MatType& V,
+                              const size_t r,
+                              arma::mat& W,
+                              arma::mat& H)
+```
+
+## Using different update rules
+
+mlpack implements the following update rules for the AMF class:
+
+ - `NMFALSUpdate`
+ - `NMFMultiplicativeDistanceUpdate`
+ - `NMFMultiplicativeDivergenceUpdate`
+ - `SVDBatchLearning`
+ - `SVDIncompleteIncrementalLearning`
+ - `SVDCompleteIncrementalLearning`
+
+Non-Negative Matrix factorization can be achieved with `NMFALSUpdate`,
+`NMFMultiplicativeDivergenceUpdate` or `NMFMultiplicativeDivergenceUpdate`.
+`NMFALSUpdate` implements a simple Alternating Least Squares optimization while
+the other rules implement algorithms given in the paper 'Algorithms for
+Non-negative Matrix Factorization'.
+
+The remaining update rules perform the singular value decomposition of the
+matrix `V`.  This SVD factorization is optimized for use by mlpack's
+collaborative filtering code (see the [collaborative filtering
+tutorial](cf.md)). This use of SVD factorizers for collaborative filtering is
+described in the paper 'A Guide to Singular Value Decomposition for
+Collaborative Filtering' by Chih-Chao Ma. For further details about the
+algorithms refer to the respective class documentation.
+
+## Using Non-Negative Matrix Factorization with `AMF`
+
+The use of `AMF` for Non-Negative Matrix factorization is simple. The AMF module
+defines `NMFALSFactorizer` which can be used directly without knowing the
+internal structure of `AMF`. For example:
+
+```c++
+#include <mlpack.hpp>
+
+using namespace std;
+using namespace arma;
+using namespace mlpack;
+
+int main()
+{
+  NMFALSFactorizer nmf;
+  mat W, H;
+  mat V = randu<mat>(100, 100);
+  double residue = nmf.Apply(V, W, H);
+}
+```
+
+`NMFALSFactorizer` uses `SimpleResidueTermination`, which is most preferred with
+Non-Negative Matrix factorizers.  The initialization of `W` and `H` in
+`NMFALSFactorizer` is random. The `Apply()` function returns the residue
+obtained by comparing the constructed matrix `W * H` with the original matrix
+`V`.
+
+## Using Singular Value Decomposition with `AMF`
+
+mlpack has the following SVD factorizers implemented for AMF:
+
+ - `SVDBatchFactorizer`
+ - `SVDIncompleteIncrementalFactorizer`
+ - `SVDCompleteIncrementalFactorizer`
+
+Each of these factorizers takes a template parameter `MatType`, which specifies
+the type of the matrix `V` (dense or sparse---these have types `arma::mat` and
+`arma::sp_mat`, respectively).  When the matrix to be factorized is relatively
+sparse, specifying `MatType = arma::sp_mat` can provide a runtime boost.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace std;
+using namespace arma;
+using namespace mlpack;
+
+int main()
+{
+  sp_mat V = randu<sp_mat>(100,100);
+  mat W, H;
+
+  SVDBatchFactorizer<sp_mat> svd;
+  double residue = svd.Apply(V, W, H);
+}
+```
+
+## Further documentation
+
+For further documentation on the `AMF` class, consult the `AMF`
+source code comments.
diff -pruN 3.4.2-7/doc/tutorials/ann/ann.txt 4.0.1-1/doc/tutorials/ann/ann.txt
--- 3.4.2-7/doc/tutorials/ann/ann.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/ann/ann.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,720 +0,0 @@
-/*!
-@file ann.txt
-@author Marcus Edel (https://kurg.org)
-@brief Tutorial for how to use the neural network code in mlpack.
-
-@page anntutorial Neural Network tutorial
-
-@section intro_anntut Introduction
-
-There is vast literature on neural networks and their uses, as well as
-strategies for choosing initial points effectively, keeping the algorithm from
-converging in local minima, choosing the best model structure, choosing the best
-optimizers, and so forth. mlpack implements many of these building blocks,
-making it very easy to create different neural networks in a modular way.
-
-mlpack currently implements two easy-to-use forms of neural networks:
-\b Feed-Forward \b Networks (this includes convolutional neural networks) and
-\b Recurrent \b Neural \b Networks.
-
-@section toc_anntut Table of Contents
-
-This tutorial is split into the following sections:
-
- - \ref intro_anntut
- - \ref toc_anntut
- - \ref model_api_anntut
- - \ref layer_api_anntut
- - \ref model_setup_training_anntut
- - \ref model_saving_loading_anntut
- - \ref extracting_parameters_anntut
- - \ref further_anntut
-
-@section model_api_anntut Model API
-
-There are two main neural network classes that are meant to be used as container
-for neural network layers that \b mlpack implements; each class is suited to a
-different setting:
-
-- \c FFN: the Feed Forward Network model provides a means to plug layers
-   together in a feed-forward fully connected manner.  This is the 'standard'
-   type of deep learning model, and includes convolutional neural networks
-   (CNNs).
-
-- \c RNN: the Recurrent Neural Network model provides a means to consider
-   successive calls to forward as different time-steps in a sequence.  This is
-   often used for time sequence modeling tasks, such as predicting the next
-   character in a sequence.
-
-Below is some basic guidance on what should be used. Note that the question of
-"which algorithm should be used" is a very difficult question to answer, so the
-guidance below is just that---guidance---and may not be right for a particular
-problem.
-
- - \b Feed-forward \b Networks allow signals or inputs to travel one way only.
-   There is no feedback within the network; for instance, the output of any
-   layer does only affect the upcoming layer. That makes Feed-Forward Networks
-   straightforward and very effective. They are extensively used in pattern
-   recognition and are ideally suitable for modeling relationships between a
-   set of input and one or more output variables.
-
-
- - \b Recurrent \b Networks allow signals or inputs to travel in both directions by
-   introducing loops in the network. Computations derived from earlier inputs are
-   fed back into the network, which gives the recurrent network some kind of
-   memory. RNNs are currently being used for all kinds of sequential tasks; for
-   instance, time series prediction, sequence labeling, and
-   sequence classification.
-
-In order to facilitate consistent implementations, the \c FFN and \c RNN classes
-have a number of methods in common:
-
- - \c Train(): trains the initialized model on the given input data. Optionally
-   an optimizer object can be passed to control the optimization process.
-
- - \c Predict(): predicts the responses to a given set of predictors. Note the
-   responses will reflect the output of the specified output layer.
-
- - \c Add(): this method can be used to add a layer to the model.
-
-@note
-To be able to optimize the network, both classes implement the OptimizerFunction
-API. In short, the \c FNN and \c RNN class implement two methods: \c Evaluate()
-and \c Gradient().  This enables the optimization given some learner and some
-performance measure.
-
-Similar to the existing layer infrastructure, the \c FFN and \c RNN classes are
-very extensible, having the following template arguments; which can be modified
-to change the behavior of the network:
-
- - \c OutputLayerType: this type defines the output layer used to evaluate the
-   network; by default, \c NegativeLogLikelihood is used.
-
- - \c InitializationRuleType: this type defines the method by which initial
-   parameters are set; by default, \c RandomInitialization is used.
-
-@code
-template<
-  typename OutputLayerType = NegativeLogLikelihood<>,
-  typename InitializationRuleType = RandomInitialization
->
-class FNN;
-@endcode
-
-Internally, the \c FFN and \c RNN class keeps an instantiated \c OutputLayerType
-class (which can be given in the constructor). This is useful for using
-different loss functions like the Negative-Log-Likelihood function or the \c
-VRClassReward function, which takes an optional score parameter. Therefore, you
-can write a non-static OutputLayerType class and use it seamlessly in
-combination with the \c FNN and \c RNN class. The same applies to the \c
-InitializationRuleType template parameter.
-
-By choosing different components for each of these template classes in
-conjunction with the \c Add() method, a very arbitrary network object can be
-constructed.
-
-Below are several examples of how the \c FNN and \c RNN classes might be used.
-The first examples focus on the \c FNN class, and the last shows how the \c
-RNN class can be used.
-
-The simplest way to use the FNN<> class is to pass in a dataset with the
-corresponding labels, and receive the classification in return. Note that the
-dataset must be column-major – that is, one column corresponds to one point. See
-the \ref matrices "matrices guide" for more information.
-
-The code below builds a simple feed-forward network with the default options,
-then queries for the assignments for every point in the \c queries matrix.
-
-\dot
-digraph G {
-  fontname = "Hilda 10"
-  rankdir=LR
-  splines=line
-  nodesep=.08;
-  ranksep=1;
-  edge [color=black, arrowsize=.5];
-  node [fixedsize=true,label="",style=filled,color=none,fillcolor=gray,shape=circle]
-
-  subgraph cluster_0 {
-    color=none;
-    node [style=filled, color=white, penwidth=15,fillcolor=black shape=circle];
-    l10  l11  l12  l13  l14  l15  ;
-    label = Input;
-  }
-
-  subgraph cluster_1 {
-    color=none;
-    node [style=filled, color=white, penwidth=15,fillcolor=gray shape=circle];
-    l20  l21  l22  l23  l24  l25  l26  l27  ;
-    label = Linear;
-  }
-
-  subgraph cluster_2 {
-    color=none;
-    node [style=filled, color=white, penwidth=15,fillcolor=gray shape=circle];
-    l30  l31  l32  l33  l34  l35  l36  l37  ;
-    label = Linear;
-  }
-
-  subgraph cluster_3 {
-    color=none;
-    node [style=filled, color=white, penwidth=15,fillcolor=black shape=circle];
-    l40  l41  l42  ;
-    label = LogSoftMax;
-  }
-
-  l10 -> l20   l10 -> l21   l10 -> l22   l10 -> l23   l10 -> l24   l10 -> l25
-  l10 -> l26   l10 -> l27   l11 -> l20   l11 -> l21   l11 -> l22   l11 -> l23
-  l11 -> l24   l11 -> l25   l11 -> l26   l11 -> l27   l12 -> l20   l12 -> l21
-  l12 -> l22   l12 -> l23   l12 -> l24   l12 -> l25   l12 -> l26   l12 -> l27
-  l13 -> l20   l13 -> l21   l13 -> l22   l13 -> l23   l13 -> l24   l13 -> l25
-  l13 -> l26   l13 -> l27   l14 -> l20   l14 -> l21   l14 -> l22   l14 -> l23
-  l14 -> l24   l14 -> l25   l14 -> l26   l14 -> l27   l15 -> l20   l15 -> l21
-  l15 -> l22   l15 -> l23   l15 -> l24   l15 -> l25   l15 -> l26   l15 -> l27
-  l20 -> l30   l20 -> l31   l20 -> l32   l20 -> l33   l20 -> l34   l20 -> l35
-  l20 -> l36   l20 -> l37   l21 -> l30   l21 -> l31   l21 -> l32   l21 -> l33
-  l21 -> l34   l21 -> l35   l21 -> l36   l21 -> l37   l22 -> l30   l22 -> l31
-  l22 -> l32   l22 -> l33   l22 -> l34   l22 -> l35   l22 -> l36   l22 -> l37
-  l23 -> l30   l23 -> l31   l23 -> l32   l23 -> l33   l23 -> l34   l23 -> l35
-  l23 -> l36   l23 -> l37   l24 -> l30   l24 -> l31   l24 -> l32   l24 -> l33
-  l24 -> l34   l24 -> l35   l24 -> l36   l24 -> l37   l25 -> l30   l25 -> l31
-  l25 -> l32   l25 -> l33   l25 -> l34   l25 -> l35   l25 -> l36   l25 -> l37
-  l26 -> l30   l26 -> l31   l26 -> l32   l26 -> l33   l26 -> l34   l26 -> l35
-  l26 -> l36   l26 -> l37   l27 -> l30   l27 -> l31   l27 -> l32   l27 -> l33
-  l27 -> l34   l27 -> l35   l27 -> l36   l27 -> l37   l30 -> l40   l30 -> l41
-  l30 -> l42   l31 -> l40   l31 -> l41   l31 -> l42   l32 -> l40   l32 -> l41
-  l32 -> l42   l33 -> l40   l33 -> l41   l33 -> l42   l34 -> l40   l34 -> l41
-  l34 -> l42   l35 -> l40   l35 -> l41   l35 -> l42   l36 -> l40   l36 -> l41
-  l36 -> l42   l37 -> l40   l37 -> l41   l37 -> l42
-}
-\enddot
-@note
-The number of inputs in the above graph doesn't match with the real
-number of features in the thyroid dataset and are just used as an abstract
-representation.
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer.hpp>
-#include <mlpack/methods/ann/ffn.hpp>
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-int main()
-{
-  // Load the training set and testing set.
-  arma::mat trainData;
-  data::Load("thyroid_train.csv", trainData, true);
-  arma::mat testData;
-  data::Load("thyroid_test.csv", testData, true);
-
-  // Split the labels from the training set and testing set respectively.
-  arma::mat trainLabels = trainData.row(trainData.n_rows - 1);
-  arma::mat testLabels = testData.row(testData.n_rows - 1);
-  trainData.shed_row(trainData.n_rows - 1);
-  testData.shed_row(testData.n_rows - 1);
-
-  // Initialize the network.
-  FFN<> model;
-  model.Add<Linear<> >(trainData.n_rows, 8);
-  model.Add<SigmoidLayer<> >();
-  model.Add<Linear<> >(8, 3);
-  model.Add<LogSoftMax<> >();
-
-  // Train the model.
-  model.Train(trainData, trainLabels);
-
-  // Use the Predict method to get the predictions.
-  arma::mat predictionTemp;
-  model.Predict(testData, predictionTemp);
-
-  /*
-    Since the predictionsTemp is of dimensions (3 x number_of_data_points)
-    with continuous values, we first need to reduce it to a dimension of
-    (1 x number_of_data_points) with scalar values, to be able to compare with
-    testLabels.
-
-    The first step towards doing this is to create a matrix of zeros with the
-    desired dimensions (1 x number_of_data_points).
-
-    In predictionsTemp, the 3 dimensions for each data point correspond to the
-    probabilities of belonging to the three possible classes.
-  */
-  arma::mat prediction = arma::zeros<arma::mat>(1, predictionTemp.n_cols);
-
-  // Find index of max prediction for each data point and store in "prediction"
-  for (size_t i = 0; i < predictionTemp.n_cols; ++i)
-  {
-    // we add 1 to the max index, so that it matches the actual test labels.
-    prediction(i) = arma::as_scalar(arma::find(
-        arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1;
-  }
-
-  /*
-    Compute the error between predictions and testLabels,
-    now that we have the desired predictions.
-  */
-  size_t correct = arma::accu(prediction == testLabels);
-  double classificationError = 1 - double(correct) / testData.n_cols;
-
-  // Print out the classification error for the testing dataset.
-  std::cout << "Classification Error for the Test set: " << classificationError << std::endl;
-  return 0;
-}
-@endcode
-
-Now, the matrix prediction holds the classification of each point in the
-dataset. Subsequently, we find the classification error by comparing it
-with testLabels.
-
-In the next example, we create simple noisy sine sequences, which are trained
-later on, using the RNN class in the `RNNModel()` method.
-
-@code
-void GenerateNoisySines(arma::mat& data,
-                        arma::mat& labels,
-                        const size_t points,
-                        const size_t sequences,
-                        const double noise = 0.3)
-{
-  arma::colvec x =  arma::linspace<arma::Col<double>>(0,
-      points - 1, points) / points * 20.0;
-  arma::colvec y1 = arma::sin(x + arma::as_scalar(arma::randu(1)) * 3.0);
-  arma::colvec y2 = arma::sin(x / 2.0 + arma::as_scalar(arma::randu(1)) * 3.0);
-
-  data = arma::zeros(points, sequences * 2);
-  labels = arma::zeros(2, sequences * 2);
-
-  for (size_t seq = 0; seq < sequences; seq++)
-  {
-    data.col(seq) = arma::randu(points) * noise + y1 +
-        arma::as_scalar(arma::randu(1) - 0.5) * noise;
-    labels(0, seq) = 1;
-
-    data.col(sequences + seq) = arma::randu(points) * noise + y2 +
-        arma::as_scalar(arma::randu(1) - 0.5) * noise;
-    labels(1, sequences + seq) = 1;
-  }
-}
-
-void RNNModel()
-{
-  const size_t rho = 10;
-
-  // Generate 12 (2 * 6) noisy sines. A single sine contains rho
-  // points/features.
-  arma::mat input, labelsTemp;
-  GenerateNoisySines(input, labelsTemp, rho, 6);
-
-  arma::mat labels = arma::zeros<arma::mat>(rho, labelsTemp.n_cols);
-  for (size_t i = 0; i < labelsTemp.n_cols; ++i)
-  {
-    const int value = arma::as_scalar(arma::find(
-        arma::max(labelsTemp.col(i)) == labelsTemp.col(i), 1)) + 1;
-    labels.col(i).fill(value);
-  }
-
-  /**
-   * Construct a network with 1 input unit, 4 hidden units and 10 output
-   * units. The hidden layer is connected to itself. The network structure
-   * looks like:
-   *
-   *  Input         Hidden        Output
-   * Layer(1)      Layer(4)      Layer(10)
-   * +-----+       +-----+       +-----+
-   * |     |       |     |       |     |
-   * |     +------>|     +------>|     |
-   * |     |    ..>|     |       |     |
-   * +-----+    .  +--+--+       +-----+
-   *            .     .
-   *            .     .
-   *            .......
-   */
-  Add<> add(4);
-  Linear<> lookup(1, 4);
-  SigmoidLayer<> sigmoidLayer;
-  Linear<> linear(4, 4);
-  Recurrent<> recurrent(add, lookup, linear, sigmoidLayer, rho);
-
-  RNN<> model(rho);
-  model.Add<IdentityLayer<> >();
-  model.Add(recurrent);
-  model.Add<Linear<> >(4, 10);
-  model.Add<LogSoftMax<> >();
-
-  StandardSGD opt(0.1, 1, input.n_cols /* 1 epoch */, -100);
-  model.Train(input, labels, opt);
-}
-@endcode
-
-For further examples on the usage of the ann classes, see [mlpack
-models](https://github.com/mlpack/models).
-
-@section layer_api_anntut Layer API
-
-In order to facilitate consistent implementations, we have defined a LayerType
-API that describes all the methods that a \c layer may implement. mlpack offers
-a few variations of this API, each designed to cover some of the model
-characteristics mentioned in the previous section. Any \c layer requires the
-implementation of a \c Forward() method. The interface looks like:
-
-@code
-template<typename eT>
-void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output);
-@endcode
-
-The method should calculate the output of the layer given the input matrix and
-store the result in the given output matrix. Next, any \c layer must implement
-the Backward() method, which uses certain computations obtained during the
-forward pass and should calculate the function f(x) by propagating x backward
-through f:
-
-@code
-template<typename eT>
-void Backward(const arma::Mat<eT>& input,
-              const arma::Mat<eT>& gy,
-              arma::Mat<eT>& g);
-@endcode
-
-Finally, if the layer is differentiable, the layer must also implement
-a Gradient() method:
-
-@code
-template<typename eT>
-void Gradient(const arma::Mat<eT>& input,
-              const arma::Mat<eT>& error,
-              arma::Mat<eT>& gradient);
-@endcode
-
-The Gradient function should calculate the gradient with respect to the input
-activations \c input and calculated errors \c error and place the results into
-the gradient matrix object \c gradient that is passed as an argument.
-
-@note
-Note that each method accepts a template parameter InputType, OutputType
-or GradientType, which may be arma::mat (dense Armadillo matrix) or arma::sp_mat
-(sparse Armadillo matrix). This allows support for both sparse-supporting and
-non-sparse-supporting \c layer without explicitly passing the type.
-
-In addition, each layer must implement the Parameters(), InputParameter(),
-OutputParameter(), Delta() methods, differentiable layer should also provide
-access to the gradient by implementing the Gradient(), Parameters() member
-function. Note each function is a single line that looks like:
-
-@code
-OutputDataType const& Parameters() const { return weights; }
-@endcode
-
-Below is an example that shows each function with some additional boilerplate
-code.
-
-@note
-Note this is not an actual layer but instead an example that exists to show and
-document all the functions that mlpack layer must implement.  For a better
-overview of the various layers, see \ref mlpack::ann. Also be aware that the
-implementations of each of the methods in this example are entirely fake and do
-not work; this example exists for its API, not its implementation.
-
-Note that layer sometimes have different properties. These properties are
-known at compile-time through the mlpack::ann::LayerTraits class, and some
-properties may imply the existence (or non-existence) of certain functions.
-Refer to the LayerTraits @ref layer_traits.hpp for more documentation on that.
-
-The two template parameters below must be template parameters to the layer, in
-the order given below. More template parameters are fine, but they must come
-after the first two.
-
- - \c InputDataType: this defines the internally used input type for example to
-   store the parameter matrix. Note, a layer could be built on a dense matrix or
-   a sparse matrix. All mlpack trees should be able to support any Armadillo-
-   compatible matrix type.  When the layer is written it should be assumed that
-   MatType has the same functionality as arma::mat. Note that
-
- - \c OutputDataType: this defines the internally used input type for example to
-   store the parameter matrix. Note, a layer could be built on a dense matrix or
-   a sparse matrix. All mlpack trees should be able to support any Armadillo-
-   compatible matrix type.  When the layer is written it should be assumed that
-   MatType has the same functionality as arma::mat.
-
-@code
-template<typename InputDataType = arma::mat,
-         typename OutputDataType = arma::mat>
-class ExampleLayer
-{
- public:
-  ExampleLayer(const size_t inSize, const size_t outSize) :
-      inputSize(inSize), outputSize(outSize)
-  {
-    /* Nothing to do here */
-  }
-}
-@endcode
-
-The constructor for \c ExampleLayer will build the layer given the input and
-output size. Note that, if the input or output size information isn't used
-internally it's not necessary to provide a specific constructor. Also, one could
-add additional or other information that are necessary for the layer
-construction. One example could be:
-
-@code
-ExampleLayer(const double ratio = 0.5) : ratio(ratio) {/* Nothing to do here*/}
-@endcode
-
-When this constructor is finished, the entire layer will be built and is ready
-to be used. Next, as pointed out above, each layer has to follow the LayerType
-API, so we must implement some additional functions.
-
-@code
-template<typename InputType, typename OutputType>
-void Forward(const InputType& input, OutputType& output)
-{
-  output = arma::ones(input.n_rows, input.n_cols);
-}
-
-template<typename InputType, typename ErrorType, typename GradientType>
-void Backward(const InputType& input, const ErrorType& gy, GradientType& g)
-{
-  g = arma::zeros(gy.n_rows, gy.n_cols) + gy;
-}
-
-template<typename InputType, typename ErrorType, typename GradientType>
-void Gradient(const InputType& input,
-              ErrorType& error,
-              GradientType& gradient)
-{
-  gradient = arma::zeros(input.n_rows, input.n_cols) * error;
-}
-@endcode
-
-The three functions \c Forward(), \c Backward() and \c Gradient() (which is
-needed for a differentiable layer) contain the main logic of the layer. The
-following functions are just to access and manipulate the different layer
-parameters.
-
-@code
-OutputDataType& Parameters() { return weights; }
-InputDataType& InputParameter() { return inputParameter; }
-OutputDataType& OutputParameter() { return outputParameter; }
-OutputDataType& Delta() { return delta; }
-OutputDataType& Gradient() { return gradient; }
-@endcode
-
-Since some of this methods return internal class members we have to define them.
-
-@code
-private:
-  size_t inSize, outSize;
-  OutputDataType weights, delta, gradient, outputParameter;
-  InputDataType inputParameter;
-@endcode
-
-Note some members are just here so \c ExampleLayer compiles without warning.
-For instance, \c inputSize is not required to be a member of every type of
-layer.
-
-There is one last method that is especially interesting for a layer that shares
-parameter. Since the layer weights are set once the complete model is defined,
-it's not possible to split the weights during the construction time. To solve
-this issue, a layer can implement the \c Reset() method which is called once the
-layer parameter is set.
-
-@section model_setup_training_anntut Model Setup & Training
-
-Once the base container is selected (\c FNN or \c RNN), the \c Add method can be
-used to add layers to the model.  The code below adds two linear layers to the
-model---the first takes 512 units as input and gives 256 output units, and
-the second takes 256 units as input and gives 128 output units.
-
-@code
-FFN<> model;
-model.Add<Linear<> >(512, 256);
-model.Add<Linear<> >(256, 128);
-@endcode
-
-The model is trained on Armadillo matrices. For training a model, you will
-typically use the \c Train() function:
-
-@code
-arma::mat trainingSet, trainingLabels;
-model.Train(trainingSet, trainingLabels);
-@endcode
-
-You can use mlpack's \c Load() function to load a dataset like this:
-
-@code
-arma::mat trainingSet;
-data::Load("dataset.csv", dataset, true);
-@endcode
-
-@code
-$ cat dataset.csv
-0, 1, 4
-1, 0, 5
-1, 1, 1
-2, 0, 2
-@endcode
-
-The type does not necessarily need to be a CSV; it can be any supported storage
-format, assuming that it is a coordinate-format file in the format specified
-above.  For more information on mlpack file formats, see the documentation for
-mlpack::data::Load().
-
-@note
-It’s often a good idea to normalize or standardize your data, for example using:
-
-@code
-for (size_t i = 0; i < dataset.n_cols; ++i)
-  dataset.col(i) /= norm(dataset.col(i), 2);
-@endcode
-
-Also, it is possible to retrain a model with new parameters or with
-a new reference set. This is functionally equivalent to creating a new model.
-
-@section model_saving_loading_anntut Saving & Loading
-
-Using \c boost::serialization (for more information about the internals see
-[Serialization - Boost C++ Libraries](www.boost.org/libs/serialization/doc/)),
-mlpack is able to load and save machine learning models with ease. To save a
-trained neural network to disk. The example below builds a model on the \c
-thyroid dataset and then saves the model to the file \c model.xml for later use.
-
-@code
-// Load the training set.
-arma::mat dataset;
-data::Load("thyroid_train.csv", dataset, true);
-
-// Split the labels from the training set.
-arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4,
-    dataset.n_cols - 1);
-
-// Split the data from the training set.
-arma::mat trainLabelsTemp = dataset.submat(dataset.n_rows - 3, 0,
-    dataset.n_rows - 1, dataset.n_cols - 1);
-
-// Initialize the network.
-FFN<> model;
-model.Add<Linear<> >(trainData.n_rows, 3);
-model.Add<SigmoidLayer<> >();
-model.Add<LogSoftMax<> >();
-
-// Train the model.
-model.Train(trainData, trainLabels);
-
-// Use the Predict method to get the assignments.
-arma::mat assignments;
-model.Predict(trainData, assignments);
-
-data::Save("model.xml", "model", model, false);
-@endcode
-
-After this, the file model.xml will be available in the current working
-directory.
-
-Now, we can look at the output model file, \c model.xml:
-
-@code
-$ cat model.xml
-<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
-<!DOCTYPE boost_serialization>
-<boost_serialization signature="serialization::archive" version="15">
-<model class_id="0" tracking_level="0" version="0">
-  <parameter class_id="1" tracking_level="1" version="0" object_id="_0">
-    <n_rows>66</n_rows>
-    <n_cols>1</n_cols>
-    <n_elem>66</n_elem>
-    <vec_state>0</vec_state>
-    <item>-7.55971528334903642e+00</item>
-    <item>-9.95435955058058930e+00</item>
-    <item>9.31133928948225353e+00</item>
-    <item>-5.36784434861701953e+00</item>
-    ...
-  </parameter>
-  <width>0</width>
-  <height>0</height>
-  <currentInput object_id="_1">
-    <n_rows>0</n_rows>
-    <n_cols>0</n_cols>
-    <n_elem>0</n_elem>
-    <vec_state>0</vec_state>
-  </currentInput>
-  <network class_id="2" tracking_level="0" version="0">
-    <count>3</count>
-    <item_version>0</item_version>
-    <item class_id="3" tracking_level="0" version="0">
-      <which>18</which>
-      <value class_id="4" tracking_level="1" version="0" object_id="_2">
-        <inSize>21</inSize>
-        <outSize>3</outSize>
-      </value>
-    </item>
-    <item>
-      <which>2</which>
-      <value class_id="5" tracking_level="1" version="0" object_id="_3"></value>
-    </item>
-    <item>
-      <which>20</which>
-      <value class_id="6" tracking_level="1" version="0" object_id="_4"></value>
-    </item>
-  </network>
-</model>
-</boost_serialization>
-@endcode
-
-As you can see, the \c \<parameter\> section of \c model.xml contains the trained
-network weights.  We can see that this section also contains the network input
-size, which is 66 rows and 1 column. Note that in this example, we used three
-different layers, as can be seen by looking at the \c \<network\> section. Each
-node has a unique id that is used to reconstruct the model when loading.
-
-The models can also be saved as \c .bin or \c .txt; the \c .xml format provides
-a human-inspectable format (though the models tend to be quite complex and may
-be difficult to read). These models can then be re-used to be used for
-classification or other tasks.
-
-So, instead of saving or training a network, mlpack can also load a pre-trained
-model. For instance, the example below will load the model from \c model.xml and
-then generate the class predictions for the \c thyroid test dataset.
-
-@code
-data::Load("thyroid_test.csv", dataset, true);
-
-arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4,
-    dataset.n_cols - 1);
-
-data::Load("model.xml", "model", model);
-
-arma::mat predictions;
-model.Predict(testData, predictions);
-@endcode
-
-This enables the possibility to distribute a model without having to train it
-first or simply to save a model for later use. Note that loading will also work
-on different machines.
-
-@section extracting_parameters_anntut Extracting Parameters
-
-To access the weights from the neural network layers, you can call the following
-function on any initialized network:
-
-@code
-model.Parameters();
-@endcode
-
-which will return the complete model parameters as an armadillo matrix object;
-however often it is useful to not only have the parameters for the complete
-network, but the parameters of a specific layer. Another method, \c Model(),
-makes this easily possible:
-
-@code
-model.Model()[1].Parameters();
-@endcode
-
-In the example above, we get the weights of the second layer.
-
-@section further_anntut Further documentation
-
-For further documentation on the ann classes, consult the \ref mlpack::ann
-"complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/ann.md 4.0.1-1/doc/tutorials/ann.md
--- 3.4.2-7/doc/tutorials/ann.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/ann.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,681 @@
+# Neural Network tutorial
+
+There is vast literature on neural networks and their uses, as well as
+strategies for choosing initial points effectively, keeping the algorithm from
+converging in local minima, choosing the best model structure, choosing the best
+optimizers, and so forth. mlpack implements many of these building blocks,
+making it very easy to create different neural networks in a modular way.
+
+mlpack currently implements two easy-to-use forms of neural networks:
+*Feed-Forward Networks* (this includes convolutional neural networks) and
+*Recurrent Neural Networks*.
+
+## Model API
+
+There are two main neural network classes that are meant to be used as container
+for neural network layers that mlpack implements; each class is suited to a
+different setting:
+
+- `FFN`: the Feed Forward Network model provides a means to plug layers
+   together in a feed-forward fully connected manner.  This is the 'standard'
+   type of deep learning model, and includes convolutional neural networks
+   (CNNs).
+
+- `RNN`: the Recurrent Neural Network model provides a means to consider
+   successive calls to forward as different time-steps in a sequence.  This is
+   often used for time sequence modeling tasks, such as predicting the next
+   character in a sequence.
+
+Below is some basic guidance on what should be used. Note that the question of
+"which algorithm should be used" is a very difficult question to answer, so the
+guidance below is just that---guidance---and may not be right for a particular
+problem.
+
+ - *Feed-forward Networks* allow signals or inputs to travel one way only.
+   There is no feedback within the network; for instance, the output of any
+   layer does only affect the upcoming layer. That makes Feed-Forward Networks
+   straightforward and very effective. They are extensively used in pattern
+   recognition and are ideally suitable for modeling relationships between a
+   set of input and one or more output variables.
+
+
+ - *Recurrent Networks* allow signals or inputs to travel in both directions by
+   introducing loops in the network. Computations derived from earlier inputs
+   are fed back into the network, which gives the recurrent network some kind of
+   memory. RNNs are currently being used for all kinds of sequential tasks; for
+   instance, time series prediction, sequence labeling, and
+   sequence classification.
+
+In order to facilitate consistent implementations, the `FFN` and `RNN` classes
+have a number of methods in common:
+
+ - `Train()`: trains the initialized model on the given input data. Optionally
+   an optimizer object can be passed to control the optimization process.
+
+ - `Predict()`: predicts the responses to a given set of predictors. Note the
+   responses will reflect the output of the specified output layer.
+
+ - `Add()`: this method can be used to add a layer to the model.
+
+*Note*: to be able to optimize the network, both classes implement the
+[ensmallen](https://www.ensmallen.org) function API. In short, the `FNN` and
+`RNN` class implement two methods: `Evaluate()` and `Gradient()`.  This enables
+the optimization given some learner and some performance measure.
+
+Similar to the existing layer infrastructure, the `FFN` and `RNN` classes are
+very extensible, having the following template arguments; which can be modified
+to change the behavior of the network:
+
+ - `OutputLayerType`: this type defines the output layer used to evaluate the
+   network; by default, `NegativeLogLikelihood` is used.
+
+ - `InitializationRuleType`: this type defines the method by which initial
+   parameters are set; by default, `RandomInitialization` is used.
+
+```c++
+template<
+  typename OutputLayerType = NegativeLogLikelihood<>,
+  typename InitializationRuleType = RandomInitialization
+>
+class FNN;
+```
+
+Internally, the `FFN` and `RNN` class keeps an instantiated `OutputLayerType`
+class (which can be given in the constructor). This is useful for using
+different loss functions like the Negative-Log-Likelihood function or the
+VRClassReward function, which takes an optional score parameter. Therefore, you
+can write a non-static `OutputLayerType` class and use it seamlessly in
+combination with the `FNN` and `RNN` class. The same applies to the
+`InitializationRuleType` template parameter.
+
+By choosing different components for each of these template classes in
+conjunction with the `Add()` method, a very arbitrary network object can be
+constructed.
+
+Below are several examples of how the `FNN` and `RNN` classes might be used.
+The first examples focus on the `FNN` class, and the last shows how the
+`RNN` class can be used.
+
+The simplest way to use the `FNN` class is to pass in a dataset with the
+corresponding labels, and receive the classification in return. Note that the
+dataset must be column-major – that is, one column corresponds to one point. See
+the [matrices guide](../user/matrices.md) for more information.
+
+The code below builds a simple feed-forward network with the default options,
+then queries for the assignments for every point in the `queries` matrix.
+
+![Example feedforward network diagram](res/ann.svg)
+
+*Note*: the number of inputs in the above graph doesn't match with the real
+number of features in the thyroid dataset and are just used as an abstract
+representation.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+int main()
+{
+  // Load the training set and testing set.
+  arma::mat trainData;
+  data::Load("thyroid_train.csv", trainData, true);
+  arma::mat testData;
+  data::Load("thyroid_test.csv", testData, true);
+
+  // Split the labels from the training set and testing set respectively.
+  // Decrement the labels by 1, so they are in the range 0 to (numClasses - 1).
+  arma::mat trainLabels = trainData.row(trainData.n_rows - 1) - 1;
+  arma::mat testLabels = testData.row(testData.n_rows - 1) - 1;
+  trainData.shed_row(trainData.n_rows - 1);
+  testData.shed_row(testData.n_rows - 1);
+
+  // Initialize the network.
+  FFN<> model;
+  model.Add<Linear>(8);
+  model.Add<Sigmoid>();
+  model.Add<Linear>(3);
+  model.Add<LogSoftMax>();
+
+  // Train the model.
+  model.Train(trainData, trainLabels);
+
+  // Use the Predict method to get the predictions.
+  arma::mat predictionTemp;
+  model.Predict(testData, predictionTemp);
+
+  /*
+    Since the predictionsTemp is of dimensions (3 x number_of_data_points)
+    with continuous values, we first need to reduce it to a dimension of
+    (1 x number_of_data_points) with scalar values, to be able to compare with
+    testLabels.
+
+    The first step towards doing this is to create a matrix of zeros with the
+    desired dimensions (1 x number_of_data_points).
+
+    In predictionsTemp, the 3 dimensions for each data point correspond to the
+    probabilities of belonging to the three possible classes.
+  */
+  arma::mat prediction = arma::zeros<arma::mat>(1, predictionTemp.n_cols);
+
+  // Find index of max prediction for each data point and store in "prediction"
+  for (size_t i = 0; i < predictionTemp.n_cols; ++i)
+  {
+    prediction(i) = arma::as_scalar(arma::find(
+        arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1));
+  }
+
+  /*
+    Compute the error between predictions and testLabels,
+    now that we have the desired predictions.
+  */
+  size_t correct = arma::accu(prediction == testLabels);
+  double classificationError = 1 - double(correct) / testData.n_cols;
+
+  // Print out the classification error for the testing dataset.
+  std::cout << "Classification Error for the Test set: " << classificationError << std::endl;
+  return 0;
+}
+```
+
+Now, the matrix prediction holds the classification of each point in the
+dataset. Subsequently, we find the classification error by comparing it
+with `testLabels`.
+
+In the next example, we create simple noisy sine sequences, which are trained
+later on, using the `RNN` class.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace ens;
+using namespace mlpack;
+
+/**
+ * Generates noisy sine wave and outputs the data and the labels that
+ * can be used directly for training and testing with RNN.
+ */
+void GenerateNoisySines(arma::cube& data,
+                        arma::cube& labels,
+                        size_t rho,
+                        const size_t dataPoints = 100,
+                        const double noisePercent = 0.2)
+{
+  size_t points = dataPoints;
+  size_t r = dataPoints % rho;
+
+  if (r == 0)
+    points += 1;
+  else
+    points += rho - r + 1;
+
+  arma::colvec x(points);
+  int i = 0;
+  double interval = 0.6 / points;
+
+  x.for_each([&i, noisePercent, interval]
+    (arma::colvec::elem_type& val) {
+    double t = interval * (++i);
+    val = ::sin(2 * M_PI * 10 * t) + (noisePercent * Random(0.0, 0.1));
+  });
+
+  arma::colvec y = x;
+  y = arma::normalise(x);
+
+  // Now break this into columns of rho size slices.
+  size_t numColumns = y.n_elem / rho;
+  data = arma::cube(1, numColumns, rho);
+  labels = arma::cube(1, numColumns, 1);
+
+  for (size_t i = 0; i < numColumns; ++i)
+  {
+    data.tube(0, i) = y.rows(i * rho, i * rho + rho - 1);
+    labels.subcube(0, i, 0, 0, i, 0) =
+        y.rows(i * rho + rho, i * rho + rho);
+  }
+}
+
+int main()
+{
+  const size_t rho = 10;
+
+  // Generate 12 (2 * 6) noisy sines. A single sine contains rho
+  // points/features.
+  arma::cube input, labels;
+  GenerateNoisySines(input, labels, rho);
+
+  /**
+   * Construct a network with 1 input unit, 4 LSTM units and 1 output
+   * unit. The hidden layer is connected to itself. The network structure
+   * looks like:
+   *
+   *  Input         Hidden        Output
+   * Layer(1)      LSTM(4)       Layer(1)
+   * +-----+       +-----+       +-----+
+   * |     |       |     |       |     |
+   * |     +------>|     +------>|     |
+   * |     |    ..>|     |       |     |
+   * +-----+    .  +--+--+       +-----+
+   *            .     .
+   *            .     .
+   *            .......
+   *
+   * We use MeanSquaredError for the loss type, since we are predicting a
+   * continuous value.
+   */
+  RNN<MeanSquaredError> model(rho, true /* only one response per sequence */);
+  model.Add<LSTM>(4);
+  model.Add<LinearNoBias>(1);
+
+  StandardSGD opt(0.1, 1, 10 * input.n_cols /* 10 epochs */, -100);
+  model.Train(input, labels, opt);
+
+  // Now compute the MSE on the training set.
+  arma::cube predictions;
+  model.Predict(input, predictions);
+  const double mse = arma::accu(arma::square(
+      arma::vectorise(labels) -
+      arma::vectorise(predictions.slice(predictions.n_slices - 1)))) /
+      input.n_cols;
+  std::cout << "MSE on training set is " << mse << "." << std::endl;
+}
+```
+
+For further examples on the usage of the ann classes, see [mlpack
+models](https://github.com/mlpack/models).
+
+## Layer API
+
+In order to facilitate consistent implementations, we have defined a `LayerType`
+API that describes all the methods that a layer may implement. mlpack offers a
+few variations of this API, each designed to cover some of the model
+characteristics mentioned in the previous section. Any layer requires the
+implementation of a `Forward()` method. The interface looks like:
+
+```c++
+template<typename MatType>
+void Forward(const MatType& input, MatType& output);
+```
+
+*(Note that `MatType` can be a template parameter of the layer class itself, not
+necessarily the `Forward()` function.  This applies to the other functions of
+the API too.)*
+
+The method should calculate the output of the layer given the input matrix and
+store the result in the given output matrix. Next, any layer must implement the
+`Backward()` method, which uses certain computations obtained during the forward
+pass and should calculate the function `f(x)` by propagating `x` backward
+through `f`:
+
+```c++
+template<typename MatType>
+void Backward(const MatType& input,
+              const MatType& gy,
+              MatType& g);
+```
+
+Finally, if the layer is differentiable, the layer must also implement
+a `Gradient()` method:
+
+```c++
+template<typename MatType>
+void Gradient(const MatType& input,
+              const MatType& error,
+              MatType& gradient);
+```
+
+The `Gradient()` function should calculate the gradient with respect to the
+input activations `input` and calculated errors `error` and place the results
+into the gradient matrix object `gradient` that is passed as an argument.
+
+Each of these three methods accepts a template parameter `MatType`,
+which may be `arma::mat` (dense Armadillo matrix) or `arma::sp_mat`
+(sparse Armadillo matrix). This allows support for both sparse-supporting and
+non-sparse-supporting layer without explicitly passing the type.
+
+Every new layer should inherit from `Layer<MatType>`, which defines some core
+functionality.  There are three additional functions that must be implemented:
+
+ - `void ComputeOutputDimensions()`: this sets the internal member
+   `outputDimensions` to the correct output dimensions of the layer, given that
+   `inputDimensions` is set.
+
+ - `size_t WeightSize() const`: given that `ComputeOutputDimensions()` has been
+   called (and so `outputDimensions` and `inputDimensions` are correct), return
+   the number of trainable weights in the layer.
+
+ - `void SetWeights(typename MatType::elem_type*)`: this sets the layer's
+   internal parameter memory to the given pointer.
+
+Below is an example that shows each function with some additional boilerplate
+code.  Note this is not an actual layer but instead an example that exists to show and
+document all the functions that mlpack layer must implement.  For a better
+overview of the various layers, see the layers in
+`src/mlpack/methods/ann/layer/`. Also be aware that the implementations of each
+of the methods in this example are entirely fake and do not work; this example
+exists for its API, not its implementation.
+
+```c++
+template<typename MatType = arma::mat>
+class ExampleLayer : public Layer<MatType>
+{
+ public:
+  // Note that the input size will be set in the member
+  // `Layer<MatType>::inputParameters` automatically before
+  // `ComputeOutputDimensions()` is called.
+  ExampleLayer()
+  {
+    /* Nothing to do here */
+  }
+
+ private:
+  MatType weights;
+}
+```
+
+The constructor for `ExampleLayer` will build the layer given the output size.
+Note that, if the output size information isn't used internally it's not
+necessary to provide a specific constructor. Also, one could add additional or
+other information that are necessary for the layer construction. One example
+could be:
+
+```c++
+template<typename MatType>
+ExampleLayer<MatType>(const double ratio = 0.5) : ratio(ratio)
+{ /* Nothing to do here */ }
+```
+
+When this constructor is finished, the entire layer will be built, but may not
+yet be ready to use.  We can assume that the enclosing `FFN` or `RNN` network
+will call `ComputeOutputDimensions()`, `WeightSize()`, and `SetWeights()` before
+any call to `Forward()` is done. Next, as pointed out above, each layer has to
+follow the `LayerType` API, so we must implement some additional functions.
+
+```c++
+template<typename MatType>
+void ExampleLayer<MatType>::Forward(const MatType& input, MatType& output)
+{
+  output = arma::ones(input.n_rows, input.n_cols) + weights;
+}
+
+template<typename MatType>
+void ExampleLayer<MatType>::Backward(const MatType& input,
+                                     const MatType& gy,
+                                     MatType& g)
+{
+  g = gy - weights;
+}
+
+template<typename MatType>
+void ExampleLayer<MatType>::Gradient(const InputType& input,
+                                     ErrorType& error,
+                                     GradientType& gradient)
+{
+  gradient = arma::ones(input.n_rows, input.n_cols);
+}
+```
+
+The three functions `Forward()`, `Backward()` and `Gradient()` (which is
+needed for a differentiable layer) contain the main logic of the layer.
+
+Now let's implement `ComputeOutputDimensions()`, `WeightSize()`, and
+`SetWeights()`.
+
+```c++
+template<typename MatType>
+void ExampleLayer<MatType>::ComputeOutputDimensions()
+{
+  // The output size is the same as the input size.
+  this->outputDimensions = this->inputDimensions;
+}
+
+template<typename MatType>
+size_t ExampleLayer<MatType>::WeightSize() const
+{
+  size_t numWeights = this->inputDimensions[0];
+  for (size_t i = 1; i < this->inputDimensions.size(); ++i)
+    numWeights *= this->inputDimensions[i];
+  return numWeights;
+}
+
+template<typename MatType>
+void ExampleLayer<MatType>::SetWeights(typename MatType::elem_type* weightsPtr)
+{
+  MakeAlias(weights, weightsPtr, WeightSize(), 1);
+}
+```
+
+## Model Setup & Training
+
+Once the base container is selected (`FNN` or `RNN`), the `Add` method can be
+used to add layers to the model.  The code below adds two linear layers to the
+model---the first takes 512 units as input and gives 256 output units, and
+the second takes 256 units as input and gives 128 output units.
+
+```c++
+FFN<> model;
+model.Add<Linear>(256);
+model.Add<Linear>(128);
+```
+
+The model is trained on Armadillo matrices. For training a model, you will
+typically use the `Train()` function:
+
+```c++
+arma::mat trainingSet, trainingLabels;
+model.Train(trainingSet, trainingLabels);
+```
+
+You can use mlpack's `Load()` function to load a dataset like this:
+
+```c++
+arma::mat trainingSet;
+data::Load("dataset.csv", dataset, true);
+```
+
+```sh
+$ cat dataset.csv
+0, 1, 4
+1, 0, 5
+1, 1, 1
+2, 0, 2
+```
+
+The type does not necessarily need to be a CSV; it can be any supported storage
+format, assuming that it is a coordinate-format file in the format specified
+above.  For more information on mlpack file formats, see the
+[tutorial](../user/formats.md).
+
+*Note*: it’s often a good idea to normalize or standardize your data, for
+example using:
+
+```c++
+for (size_t i = 0; i < dataset.n_cols; ++i)
+  dataset.col(i) /= norm(dataset.col(i), 2);
+```
+
+Also, it is possible to retrain a model with new parameters or with
+a new reference set. This is functionally equivalent to creating a new model.
+
+## Saving & Loading
+
+Using `cereal` (for more information about the internals see [the Cereal
+website](http://uscilab.github.io/cereal/)), mlpack is able to load and save
+machine learning models with ease.  Note that due to the large compilation
+overhead of enabling serialization, it is disabled by default.  To enable
+serialization for neural networks, define the `MLPACK_ENABLE_ANN_SERIALIZATION`
+macro before including mlpack:
+
+```c++
+#define MLPACK_ENABLE_ANN_SERIALIZATION
+#include <mlpack.hpp>
+```
+
+The example below builds a model on the `thyroid` dataset and then saves the
+model to the file `model.xml` for later use.
+
+```c++
+// Load the training set.
+arma::mat dataset;
+data::Load("thyroid_train.csv", dataset, true);
+
+// Split the labels from the training set.
+arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4,
+    dataset.n_cols - 1);
+
+// Split the data from the training set.
+// Subtract 1 so the labels are the range from 0 to (numClasses - 1).
+arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0,
+    dataset.n_rows - 1, dataset.n_cols - 1) - 1;
+
+// Initialize the network.
+FFN<> model;
+model.Add<Linear>(3);
+model.Add<Sigmoid>();
+model.Add<LogSoftMax>();
+
+// Train the model.
+model.Train(trainData, trainLabels);
+
+// Use the Predict method to get the assignments.
+arma::mat assignments;
+model.Predict(trainData, assignments);
+
+data::Save("model.xml", "model", model, false);
+```
+
+After this, the file `model.xml` will be available in the current working
+directory.
+
+Now, we can look at the output model file, `model.xml`:
+
+```sh
+$ cat model.xml
+<?xml version="1.0" encoding="utf-8"?>
+<cereal>
+	<model>
+		<cereal_class_version>0</cereal_class_version>
+		<parameter>
+			<n_rows>60</n_rows>
+			<n_cols>1</n_cols>
+			<vec_state>0</vec_state>
+			<elem>10.461979353567767</elem>
+			<elem>-10.040855482151116</elem>
+			<elem>0.18048901768535316</elem>
+			<elem>4.8989495084787169</elem>
+			<elem>-4.4381643782652276</elem>
+			<elem>0.049477846402230616</elem>
+			<elem>2.5271808924795987</elem>
+			<elem>-3.96993488526287</elem>
+			...
+		</parameter>
+		<width>0</width>
+		<height>0</height>
+		<reset>true</reset>
+		<value0>
+			<vecSize>3</vecSize>
+			<value0>
+				<which>30</which>
+				<value0>
+					<cereal_class_version>0</cereal_class_version>
+					<smartPointer>
+						<ptr_wrapper>
+							<valid>1</valid>
+							<data>
+								<cereal_class_version>0</cereal_class_version>
+								<inSize>19</inSize>
+								<outSize>3</outSize>
+							</data>
+						</ptr_wrapper>
+					</smartPointer>
+				</value0>
+			</value0>
+			<value1>
+				<which>6</which>
+				<value0>
+					<cereal_class_version>0</cereal_class_version>
+					<smartPointer>
+						<ptr_wrapper>
+							<valid>1</valid>
+							<data>
+								<cereal_class_version>0</cereal_class_version>
+							</data>
+						</ptr_wrapper>
+					</smartPointer>
+				</value0>
+			</value1>
+			<value2>
+				<which>32</which>
+				<value0>
+					<cereal_class_version>0</cereal_class_version>
+					<smartPointer>
+						<ptr_wrapper>
+							<valid>1</valid>
+							<data>
+								<cereal_class_version>0</cereal_class_version>
+							</data>
+						</ptr_wrapper>
+					</smartPointer>
+				</value0>
+			</value2>
+		</value0>
+	</model>
+</cereal>
+```
+
+As you can see, the `<parameter>` section of `model.xml` contains the trained
+network weights.  We can see that this section also contains the network input
+size, which is 66 rows and 1 column. Note that in this example, we used three
+different layers, as can be seen by looking at the `<network>` section. Each
+node has a unique id that is used to reconstruct the model when loading.
+
+The models can also be saved as `.bin` or `.txt`; the `.xml` format provides
+a human-inspectable format (though the models tend to be quite complex and may
+be difficult to read). These models can then be re-used to be used for
+classification or other tasks.
+
+So, instead of saving or training a network, mlpack can also load a pre-trained
+model. For instance, the example below will load the model from `model.xml` and
+then generate the class predictions for the `thyroid` test dataset.
+
+```c++
+data::Load("thyroid_test.csv", dataset, true);
+
+arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4,
+    dataset.n_cols - 1);
+
+data::Load("model.xml", "model", model);
+
+arma::mat predictions;
+model.Predict(testData, predictions);
+```
+
+This enables the possibility to distribute a model without having to train it
+first or simply to save a model for later use. Note that loading will also work
+on different machines.
+
+## Extracting Parameters
+
+To access the weights from the neural network layers, you can call the following
+function on any initialized network:
+
+```c++
+model.Parameters();
+```
+
+which will return the complete model parameters as an armadillo matrix object;
+however often it is useful to not only have the parameters for the complete
+network, but the parameters of a specific layer.  The parameters for a specific
+layer `x` can be accessed via the `Parameters()` member:
+
+```c++
+arma::mat parametersX = model.Model()[x].Parameters();
+```
+
+In the example above, we get the weights of the `x`th layer.
+
+## Further documentation
+
+For further documentation on the ann classes, consult the source code in the
+`src/mlpack/methods/ann/` directory.  Each of the layers are implemented in
+`src/mlpack/methods/ann/layer`.
diff -pruN 3.4.2-7/doc/tutorials/approx_kfn/approx_kfn.txt 4.0.1-1/doc/tutorials/approx_kfn/approx_kfn.txt
--- 3.4.2-7/doc/tutorials/approx_kfn/approx_kfn.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/approx_kfn/approx_kfn.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,1036 +0,0 @@
-/*!
-
-@file approx_kfn.txt
-@author Ryan Curtin
-@brief Tutorial for how to use approximate furthest neighbor search in mlpack.
-
-@page akfntutorial Approximate furthest neighbor search (mlpack_approx_kfn) tutorial
-
-@section intro_akfntut Introduction
-
-\b mlpack implements multiple strategies for approximate furthest neighbor
-search in its \c mlpack_approx_kfn and \c mlpack_kfn programs (each program
-corresponds to different techniques).  This tutorial discusses what problems
-these algorithms solve and how to use each of the techniques that \b mlpack
-implements.
-
-\b mlpack implements five approximate furthest neighbor search algorithms:
-
- - brute-force search (in \c mlpack_kfn)
- - single-tree search (in \c mlpack_kfn)
- - dual-tree search (in \c mlpack_kfn)
- - query-dependent approximate furthest neighbor (QDAFN) (in \c mlpack_approx_kfn)
- - DrusillaSelect (in \c mlpack_approx_kfn)
-
-These methods are described in the following papers:
-
-@code
-@inproceedings{curtin2013tree,
-  title={Tree-Independent Dual-Tree Algorithms},
-  author={Curtin, Ryan R. and March, William B. and Ram, Parikshit and Anderson,
-      David V. and Gray, Alexander G. and Isbell Jr., Charles L.},
-  booktitle={Proceedings of The 30th International Conference on Machine
-      Learning (ICML '13)},
-  pages={1435--1443},
-  year={2013}
-}
-@endcode
-
-@code
-@incollection{pagh2015approximate,
-  title={Approximate furthest neighbor in high dimensions},
-  author={Pagh, Rasmus and Silvestri, Francesco and Sivertsen, Johan and Skala,
-      Matthew},
-  booktitle={Similarity Search and Applications},
-  pages={3--14},
-  year={2015},
-  publisher={Springer}
-}
-@endcode
-
-@code
-@incollection{curtin2016fast,
-  title={Fast approximate furthest neighbors with data-dependent candidate
-      selection},
-  author={Curtin, Ryan R., and Gardner, Andrew B.},
-  booktitle={Similarity Search and Applications},
-  pages={221--235},
-  year={2016},
-  publisher={Springer}
-}
-@endcode
-
-@code
-@article{curtin2018exploiting,
-  title={Exploiting the structure of furthest neighbor search for fast
-      approximate results},
-  author={Curtin, Ryan R., and Echauz, Javier, and Gardner, Andrew B.},
-  journal={Information Systems},
-  year={2018},
-  publisher={Elsevier}
-}
-@endcode
-
-The problem of furthest neighbor search is simple, and is the opposite of the
-much-more-studied nearest neighbor search problem.  Given a set of reference
-points \f$R\f$ (the set in which we are searching), and a set of query points
-\f$Q\f$ (the set of points for which we want the furthest neighbor), our goal is
-to return the \f$k\f$ furthest neighbors for each query point in \f$Q\f$:
-
-\f[
-\operatorname{k-argmax}_{p_r \in R} d(p_q, p_r).
-\f]
-
-In order to solve this problem, \b mlpack provides a number of interfaces.
-
- - two \ref cli_akfntut "simple command-line executables" to calculate
-   approximate furthest neighbors
- - a simple \ref cpp_qdafn_akfntut "C++ class for QDAFN"
- - a simple \ref cpp_ds_akfntut "C++ class for DrusillaSelect"
- - a simple \ref cpp_ns_akfntut "C++ class for tree-based and brute-force"
-   search
-
-@section toc_akfntut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_akfntut
- - \ref toc_akfntut
- - \ref which_akfntut
- - \ref cli_akfntut
-   - \ref cli_ex1_akfntut
-   - \ref cli_ex2_akfntut
-   - \ref cli_ex3_akfntut
-   - \ref cli_ex4_akfntut
-   - \ref cli_ex5_akfntut
-   - \ref cli_ex6_akfntut
-   - \ref cli_ex7_akfntut
-   - \ref cli_ex8_akfntut
-   - \ref cli_final_akfntut
- - \ref cpp_ds_akfntut
-   - \ref cpp_ex1_ds_akfntut
-   - \ref cpp_ex2_ds_akfntut
-   - \ref cpp_ex3_ds_akfntut
-   - \ref cpp_ex4_ds_akfntut
-   - \ref cpp_ex5_ds_akfntut
- - \ref cpp_qdafn_akfntut
-   - \ref cpp_ex1_qdafn_akfntut
-   - \ref cpp_ex2_qdafn_akfntut
-   - \ref cpp_ex3_qdafn_akfntut
-   - \ref cpp_ex4_qdafn_akfntut
-   - \ref cpp_ex5_qdafn_akfntut
- - \ref cpp_ns_akfntut
-   - \ref cpp_ex1_ns_akfntut
-   - \ref cpp_ex2_ns_akfntut
-   - \ref cpp_ex3_ns_akfntut
-   - \ref cpp_ex4_ns_akfntut
- - \ref further_doc_akfntut
-
-@section which_akfntut Which algorithm should be used?
-
-There are three algorithms for furthest neighbor search that \b mlpack
-implements, and each is suited to a different setting.  Below is some basic
-guidance on what should be used.  Note that the question of "which algorithm
-should be used" is a very difficult question to answer, so the guidance below is
-just that---guidance---and may not be right for a particular problem.
-
- - \c DrusillaSelect is very fast and will perform extremely well for datasets
-   with outliers or datasets with structure (like low-dimensional datasets
-   embedded in high dimensions)
- - \c QDAFN is a random approach and therefore should be well-suited for
-   datasets with little to no structure
- - The tree-based approaches (the \c KFN class and the \c mlpack_kfn program) is
-   best suited for low-dimensional datasets, and is most effective when very
-   small levels of approximation are desired, or when exact results are desired.
- - Dual-tree search is most useful when the query set is large and structured
-   (like for all-furthest-neighbor search).
- - Single-tree search is more useful when the query set is small.
-
-@section cli_akfntut Command-line 'mlpack_approx_kfn' and 'mlpack_kfn'
-
-\b mlpack provides two command-line programs to solve approximate furthest
-neighbor search:
-
- - \c mlpack_approx_kfn, for the QDAFN and DrusillaSelect approaches
- - \c mlpack_kfn, for exact and approximate tree-based approaches
-
-These two programs allow a large number of algorithms to be used to find
-approximate furthest neighbors.  Note that the \c mlpack_kfn program is also
-documented by the \ref cli_nstut section of the \ref nstutorial page, as it
-shares options with the \c mlpack_knn program.
-
-Below are several examples of how the \c mlpack_approx_kfn and \c mlpack_kfn
-programs might be used.  The first examples focus on the \c mlpack_approx_kfn
-program, and the last few show how \c mlpack_kfn can be used to produce
-approximate results.
-
-@subsection cli_ex1_akfntut Calculate 5 furthest neighbors with default options
-
-Here we have a query dataset \c queries.csv and a reference dataset \c refs.csv
-and we wish to find the 5 furthest neighbors of every query point in the
-reference dataset.  We may do that with the \c mlpack_approx_kfn algorithm,
-using the default of the \c DrusillaSelect algorithm with default parameters.
-
-@code
-$ mlpack_approx_kfn -q queries.csv -r refs.csv -v -k 5 -n n.csv -d d.csv
-[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Building DrusillaSelect model...
-[INFO ] Model built.
-[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Searching for 5 furthest neighbors with DrusillaSelect...
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'n.csv'.
-[INFO ] Saving CSV data to 'd.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: ds
-[INFO ]   calculate_error: false
-[INFO ]   distances_file: d.csv
-[INFO ]   exact_distances_file: ""
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 5
-[INFO ]   neighbors_file: n.csv
-[INFO ]   num_projections: 5
-[INFO ]   num_tables: 5
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: queries.csv
-[INFO ]   reference_file: refs.csv
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   drusilla_select_construct: 0.000342s
-[INFO ]   drusilla_select_search: 0.000780s
-[INFO ]   loading_data: 0.010689s
-[INFO ]   saving_data: 0.005585s
-[INFO ]   total_time: 0.018592s
-@endcode
-
-Convenient timers for parts of the program operation are printed.  The results,
-saved in \c n.csv and \c d.csv, indicate the furthest neighbors and distances
-for each query point.  The row of the output file indicates the query point that
-the results are for.  The neighbors are listed from furthest to nearest; so, the
-4th element in the 3rd row of \c d.csv indicates the distance between the 3rd
-query point in \c queries.csv and its approximate 4th furthest neighbor.
-Similarly, the same element in \c n.csv indicates the index of the approximate
-4th furthest neighbor (with respect to \c refs.csv).
-
-@subsection cli_ex2_akfntut Specifying algorithm parameters for DrusillaSelect
-
-The \c -p (\c --num_projections) and \c -t (\c --num_tables) parameters affect
-the running of the \c DrusillaSelect algorithm and the QDAFN algorithm.
-Specifically, larger values for each of these parameters will search more
-possible candidate furthest neighbors and produce better results (at the cost of
-runtime).  More details on how each of these parameters works is available in
-the original papers, the \b mlpack source, or the documentation given by
-\c --help.
-
-In the example below, we run \c DrusillaSelect to find 4 furthest neighbors
-using 10 tables and 2 points in each table.  In this case we have chosen to omit
-the \c -n \c n.csv option, meaning that only the output candidate distances will
-be written to \c d.csv.
-
-@code
-$ mlpack_approx_kfn -q queries.csv -r refs.csv -v -k 4 -n n.csv -d d.csv -t 10 -p 2
-[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Building DrusillaSelect model...
-[INFO ] Model built.
-[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Searching for 4 furthest neighbors with DrusillaSelect...
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'n.csv'.
-[INFO ] Saving CSV data to 'd.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: ds
-[INFO ]   calculate_error: false
-[INFO ]   distances_file: d.csv
-[INFO ]   exact_distances_file: ""
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 4
-[INFO ]   neighbors_file: n.csv
-[INFO ]   num_projections: 2
-[INFO ]   num_tables: 10
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: queries.csv
-[INFO ]   reference_file: refs.csv
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   drusilla_select_construct: 0.000645s
-[INFO ]   drusilla_select_search: 0.000551s
-[INFO ]   loading_data: 0.008518s
-[INFO ]   saving_data: 0.003734s
-[INFO ]   total_time: 0.014019s
-@endcode
-
-@subsection cli_ex3_akfntut Using QDAFN instead of DrusillaSelect
-
-The algorithm to be used for approximate furthest neighbor search can be
-specified with the \c --algorithm (\c -a) option to the \c mlpack_approx_kfn
-program.  Below, we use the QDAFN algorithm instead of the default.  We leave
-the \c -p and \c -t options at their defaults---even though QDAFN often requires
-more tables and points to get the same quality of results.
-
-@code
-$ mlpack_approx_kfn -q queries.csv -r refs.csv -v -k 3 -n n.csv -d d.csv -a qdafn
-[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Building QDAFN model...
-[INFO ] Model built.
-[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Searching for 3 furthest neighbors with QDAFN...
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'n.csv'.
-[INFO ] Saving CSV data to 'd.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: qdafn
-[INFO ]   calculate_error: false
-[INFO ]   distances_file: d.csv
-[INFO ]   exact_distances_file: ""
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 3
-[INFO ]   neighbors_file: n.csv
-[INFO ]   num_projections: 5
-[INFO ]   num_tables: 5
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: queries.csv
-[INFO ]   reference_file: refs.csv
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   loading_data: 0.008380s
-[INFO ]   qdafn_construct: 0.003399s
-[INFO ]   qdafn_search: 0.000886s
-[INFO ]   saving_data: 0.002253s
-[INFO ]   total_time: 0.015465s
-@endcode
-
-@subsection cli_ex4_akfntut Printing results quality with exact distances
-
-The \c mlpack_approx_kfn program can calculate the quality of the results if the
-\c --calculate_error (\c -e) flag is specified.  Below we use the program with
-its default parameters and calculate the error, which is displayed in the
-output.  The error is only calculated for the furthest neighbor, not all k;
-therefore, in this example we have set \c -k to \c 1.
-
-@code
-$ mlpack_approx_kfn -q queries.csv -r refs.csv -v -k 1 -e -q -n n.csv
-[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Building DrusillaSelect model...
-[INFO ] Model built.
-[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Searching for 1 furthest neighbors with DrusillaSelect...
-[INFO ] Search complete.
-[INFO ] Calculating exact distances...
-[INFO ] 28891 node combinations were scored.
-[INFO ] 37735 base cases were calculated.
-[INFO ] Calculation complete.
-[INFO ] Average error: 1.08417.
-[INFO ] Maximum error: 1.28712.
-[INFO ] Minimum error: 1.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: ds
-[INFO ]   calculate_error: true
-[INFO ]   distances_file: ""
-[INFO ]   exact_distances_file: ""
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 3
-[INFO ]   neighbors_file: ""
-[INFO ]   num_projections: 5
-[INFO ]   num_tables: 5
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: queries.csv
-[INFO ]   reference_file: refs.csv
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   computing_neighbors: 0.001476s
-[INFO ]   drusilla_select_construct: 0.000309s
-[INFO ]   drusilla_select_search: 0.000495s
-[INFO ]   loading_data: 0.008462s
-[INFO ]   total_time: 0.011670s
-[INFO ]   tree_building: 0.000202s
-@endcode
-
-Note that the output includes three lines indicating the error:
-
-@code
-[INFO ] Average error: 1.08417.
-[INFO ] Maximum error: 1.28712.
-[INFO ] Minimum error: 1.
-@endcode
-
-In this case, a minimum error of 1 indicates an exact result, and over the
-entire query set the algorithm has returned a furthest neighbor candidate with
-maximum error 1.28712.
-
-@subsection cli_ex5_akfntut Using cached exact distances for quality results
-
-However, for large datasets, calculating the error may take a long time, because
-the exact furthest neighbors must be calculated.  Therefore, if the exact
-furthest neighbor distances are already known, they may be passed in with the
-\c --exact_distances_file (\c -x) option in order to avoid the calculation.  In
-the example below, we assume \c exact.csv contains the exact furthest neighbor
-distances.  We run the \c qdafn algorithm in this example.
-
-Note that the \c -e option must be specified for the \c -x option have any
-effect.
-
-@code
-$ mlpack_approx_kfn -q queries.csv -r refs.csv -k 1 -e -x exact.csv -n n.csv -v -a qdafn
-[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Building QDAFN model...
-[INFO ] Model built.
-[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Searching for 1 furthest neighbors with QDAFN...
-[INFO ] Search complete.
-[INFO ] Loading 'exact.csv' as raw ASCII formatted data.  Size is 1 x 1000.
-[INFO ] Average error: 1.06914.
-[INFO ] Maximum error: 1.67407.
-[INFO ] Minimum error: 1.
-[INFO ] Saving CSV data to 'n.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: qdafn
-[INFO ]   calculate_error: true
-[INFO ]   distances_file: ""
-[INFO ]   exact_distances_file: exact.csv
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 1
-[INFO ]   neighbors_file: n.csv
-[INFO ]   num_projections: 5
-[INFO ]   num_tables: 5
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: queries.csv
-[INFO ]   reference_file: refs.csv
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   loading_data: 0.010348s
-[INFO ]   qdafn_construct: 0.000318s
-[INFO ]   qdafn_search: 0.000793s
-[INFO ]   saving_data: 0.000259s
-[INFO ]   total_time: 0.012254s
-@endcode
-
-@subsection cli_ex6_akfntut Using tree-based approximation with mlpack_kfn
-
-The \c mlpack_kfn algorithm allows specifying a desired approximation level with
-the \c --epsilon (\c -e) option.  The parameter must be greater than or equal
-to 0 and less than 1.  A setting of 0 indicates exact search.
-
-The example below runs dual-tree furthest neighbor search (the default
-algorithm) with the approximation parameter set to 0.5.
-
-@code
-$ mlpack_kfn -q queries.csv -r refs.csv -v -k 3 -e 0.5 -n n.csv -d d.csv
-[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded reference data from 'refs.csv' (3x1000).
-[INFO ] Building reference tree...
-[INFO ] Tree built.
-[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded query data from 'queries.csv' (3x1000).
-[INFO ] Searching for 3 neighbors with dual-tree kd-tree search...
-[INFO ] 1611 node combinations were scored.
-[INFO ] 13938 base cases were calculated.
-[INFO ] 1611 node combinations were scored.
-[INFO ] 13938 base cases were calculated.
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'n.csv'.
-[INFO ] Saving CSV data to 'd.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: dual_tree
-[INFO ]   distances_file: d.csv
-[INFO ]   epsilon: 0.5
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 3
-[INFO ]   leaf_size: 20
-[INFO ]   naive: false
-[INFO ]   neighbors_file: n.csv
-[INFO ]   output_model_file: ""
-[INFO ]   percentage: 1
-[INFO ]   query_file: queries.csv
-[INFO ]   random_basis: false
-[INFO ]   reference_file: refs.csv
-[INFO ]   seed: 0
-[INFO ]   single_mode: false
-[INFO ]   tree_type: kd
-[INFO ]   true_distances_file: ""
-[INFO ]   true_neighbors_file: ""
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   computing_neighbors: 0.000442s
-[INFO ]   loading_data: 0.008060s
-[INFO ]   saving_data: 0.002850s
-[INFO ]   total_time: 0.012667s
-[INFO ]   tree_building: 0.000251s
-@endcode
-
-Note that the format of the output files \c d.csv and \c n.csv are the same as
-for \c mlpack_approx_kfn.
-
-@subsection cli_ex7_akfntut Different algorithms with 'mlpack_kfn'
-
-The \c mlpack_kfn program offers a large number of different algorithms that can
-be used.  The \c --algorithm (\c -a) may be used to specify three main different
-algorithm types: \c naive (brute-force search), \c single_tree (single-tree
-search), \c dual_tree (dual-tree search, the default), and \c greedy
-("defeatist" greedy search, which goes to one leaf node of the tree then
-terminates).  The example below uses single-tree search to find approximate
-neighbors with epsilon set to 0.1.
-
-@code
-mlpack_kfn -q queries.csv -r refs.csv -v -k 3 -e 0.1 -n n.csv -d d.csv -a single_tree
-[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded reference data from 'refs.csv' (3x1000).
-[INFO ] Building reference tree...
-[INFO ] Tree built.
-[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded query data from 'queries.csv' (3x1000).
-[INFO ] Searching for 3 neighbors with single-tree kd-tree search...
-[INFO ] 13240 node combinations were scored.
-[INFO ] 15924 base cases were calculated.
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'n.csv'.
-[INFO ] Saving CSV data to 'd.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: single_tree
-[INFO ]   distances_file: d.csv
-[INFO ]   epsilon: 0.1
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 3
-[INFO ]   leaf_size: 20
-[INFO ]   naive: false
-[INFO ]   neighbors_file: n.csv
-[INFO ]   output_model_file: ""
-[INFO ]   percentage: 1
-[INFO ]   query_file: queries.csv
-[INFO ]   random_basis: false
-[INFO ]   reference_file: refs.csv
-[INFO ]   seed: 0
-[INFO ]   single_mode: false
-[INFO ]   tree_type: kd
-[INFO ]   true_distances_file: ""
-[INFO ]   true_neighbors_file: ""
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   computing_neighbors: 0.000850s
-[INFO ]   loading_data: 0.007858s
-[INFO ]   saving_data: 0.003445s
-[INFO ]   total_time: 0.013084s
-[INFO ]   tree_building: 0.000250s
-@endcode
-
-@subsection cli_ex8_akfntut Saving a model for later use
-
-The \c mlpack_approx_kfn and \c mlpack_kfn programs both allow models to be
-saved and loaded for future use.  The \c --output_model_file (\c -M) option
-allows specifying where to save a model, and the \c --input_model_file (\c -m)
-option allows a model to be loaded instead of trained.  So, if you specify
-\c --input_model_file then you do not need to specify \c --reference_file
-(\c -r), \c --num_projections (\c -p), or \c --num_tables (\c -t).
-
-The example below saves a model with 10 projections and 5 tables.  Note that
-neither \c --query_file (\c -q) nor \c -k are specified; this run only builds
-the model and saves it to \c model.bin.
-
-@code
-$ mlpack_approx_kfn -r refs.csv -t 5 -p 10 -v -M model.bin
-[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Building DrusillaSelect model...
-[INFO ] Model built.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: ds
-[INFO ]   calculate_error: false
-[INFO ]   distances_file: ""
-[INFO ]   exact_distances_file: ""
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 0
-[INFO ]   neighbors_file: ""
-[INFO ]   num_projections: 10
-[INFO ]   num_tables: 5
-[INFO ]   output_model_file: model.bin
-[INFO ]   query_file: ""
-[INFO ]   reference_file: refs.csv
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   drusilla_select_construct: 0.000321s
-[INFO ]   loading_data: 0.004700s
-[INFO ]   total_time: 0.007320s
-@endcode
-
-Now, with the model saved, we can run approximate furthest neighbor search on a
-query set using the saved model:
-
-@code
-$ mlpack_approx_kfn -m model.bin -q queries.csv -k 3 -d d.csv -n n.csv -v
-[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Searching for 3 furthest neighbors with DrusillaSelect...
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'n.csv'.
-[INFO ] Saving CSV data to 'd.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   algorithm: ds
-[INFO ]   calculate_error: false
-[INFO ]   distances_file: d.csv
-[INFO ]   exact_distances_file: ""
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: model.bin
-[INFO ]   k: 3
-[INFO ]   neighbors_file: n.csv
-[INFO ]   num_projections: 5
-[INFO ]   num_tables: 5
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: queries.csv
-[INFO ]   reference_file: ""
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   drusilla_select_search: 0.000878s
-[INFO ]   loading_data: 0.004599s
-[INFO ]   saving_data: 0.003006s
-[INFO ]   total_time: 0.009234s
-@endcode
-
-These options work in the same way for both the \c mlpack_approx_kfn and
-\c mlpack_kfn programs.
-
-@subsection cli_final_akfntut Final command-line program notes
-
-Both the \c mlpack_kfn and \c mlpack_approx_kfn programs contain numerous
-options not fully documented in these short examples.  You can run each program
-with the \c --help (\c -h) option for more information.
-
-@section cpp_ds_akfntut DrusillaSelect C++ class
-
-\b mlpack provides a simple \c DrusillaSelect C++ class that can be used inside
-of C++ programs to perform approximate furthest neighbor search.  The class has
-only one template parameter---\c MatType---which specifies the type of matrix to
-be use.  That means the class can be used with either dense data (of type
-\c arma::mat) or sparse data (of type \c arma::sp_mat).
-
-The following examples show simple usage of this class.
-
-@subsection cpp_ex1_ds_akfntut Approximate furthest neighbors with defaults
-
-The code below builds a \c DrusillaSelect model with default options on the
-matrix \c dataset, then queries for the approximate furthest neighbor of every
-point in the \c queries matrix.
-
-@code
-#include <mlpack/methods/approx_kfn/drusilla_select.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-// The query set.
-extern arma::mat queries;
-
-// Construct the model with defaults.
-DrusillaSelect<> ds(dataset);
-
-// Query the model, putting output into the following two matrices.
-arma::mat distances;
-arma::Mat<size_t> neighbors;
-ds.Search(queries, 1, neighbors, distances);
-@endcode
-
-At the end of this code, both the \c distances and \c neighbors matrices will
-have number of columns equal to the number of columns in the \c queries matrix.
-So, each column of the \c distances and \c neighbors matrices are the distances
-or neighbors of the corresponding column in the \c queries matrix.
-
-@subsection cpp_ex2_ds_akfntut Custom numbers of tables and projections
-
-The following example constructs a DrusillaSelect model with 10 tables and 5
-projections.  Once that is done it performs the same task as the previous
-example.
-
-@code
-#include <mlpack/methods/approx_kfn/drusilla_select.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-// The query set.
-extern arma::mat queries;
-
-// Construct the model with custom parameters.
-DrusillaSelect<> ds(dataset, 10, 5);
-
-// Query the model, putting output into the following two matrices.
-arma::mat distances;
-arma::Mat<size_t> neighbors;
-ds.Search(queries, 1, neighbors, distances);
-@endcode
-
-@subsection cpp_ex3_ds_akfntut Accessing the candidate set
-
-The \c DrusillaSelect algorithm merely scans the reference set and extracts a
-number of points that will be queried in a brute-force fashion when the
-\c Search() method is called.  We can access this set with the \c CandidateSet()
-method.  The code below prints the fifth point of the candidate set.
-
-@code
-#include <mlpack/methods/approx_kfn/drusilla_select.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-
-// Construct the model with custom parameters.
-DrusillaSelect<> ds(dataset, 10, 5);
-
-// Print the fifth point of the candidate set.
-std::cout << ds.CandidateSet().col(4).t();
-@endcode
-
-@subsection cpp_ex4_ds_akfntut Retraining on a new reference set
-
-It is possible to retrain a \c DrusillaSelect model with new parameters or with
-a new reference set.  This is functionally equivalent to creating a new model.
-The example code below creates a first \c DrusillaSelect model using 3 tables
-and 10 projections, and then retrains this with the same reference set using 10
-tables and 3 projections.
-
-@code
-#include <mlpack/methods/approx_kfn/drusilla_select.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-
-// Construct the model with initial parameters.
-DrusillaSelect<> ds(dataset, 3, 10);
-
-// Now retrain with different parameters.
-ds.Train(dataset, 10, 3);
-@endcode
-
-@subsection cpp_ex5_ds_akfntut Running on sparse data
-
-We can set the template parameter for \c DrusillaSelect to \c arma::sp_mat in
-order to perform furthest neighbor search on sparse data.  This code below
-creates a \c DrusillaSelect model using 4 tables and 6 projections with sparse
-input data, then searches for 3 approximate furthest neighbors.
-
-@code
-#include <mlpack/methods/approx_kfn/drusilla_select.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::sp_mat dataset;
-// The query dataset.
-extern arma::sp_mat querySet;
-
-// Construct the model on sparse data.
-DrusillaSelect<arma::sp_mat> ds(dataset, 4, 6);
-
-// Search on query data.
-arma::Mat<size_t> neighbors;
-arma::mat distances;
-ds.Search(querySet, 3, neighbors, distances);
-@endcode
-
-@section cpp_qdafn_akfntut QDAFN C++ class
-
-\b mlpack also provides a standalone simple \c QDAFN class for furthest neighbor
-search.  The API for this class is virtually identical to the \c DrusillaSelect
-class, and also has one template parameter to specify the type of matrix to be
-used (dense or sparse or other).
-
-The following subsections demonstrate usage of the \c QDAFN class in the same
-way as the previous section's examples for \c DrusillaSelect.
-
-@subsection cpp_ex1_qdafn_akfntut Approximate furthest neighbors with defaults
-
-The code below builds a \c QDAFN model with default options on the
-matrix \c dataset, then queries for the approximate furthest neighbor of every
-point in the \c queries matrix.
-
-@code
-#include <mlpack/methods/approx_kfn/qdafn.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-// The query set.
-extern arma::mat queries;
-
-// Construct the model with defaults.
-QDAFN<> qd(dataset);
-
-// Query the model, putting output into the following two matrices.
-arma::mat distances;
-arma::Mat<size_t> neighbors;
-qd.Search(queries, 1, neighbors, distances);
-@endcode
-
-At the end of this code, both the \c distances and \c neighbors matrices will
-have number of columns equal to the number of columns in the \c queries matrix.
-So, each column of the \c distances and \c neighbors matrices are the distances
-or neighbors of the corresponding column in the \c queries matrix.
-
-@subsection cpp_ex2_qdafn_akfntut Custom numbers of tables and projections
-
-The following example constructs a QDAFN model with 15 tables and 30
-projections.  Once that is done it performs the same task as the previous
-example.
-
-@code
-#include <mlpack/methods/approx_kfn/qdafn.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-// The query set.
-extern arma::mat queries;
-
-// Construct the model with custom parameters.
-QDAFN<> qdafn(dataset, 15, 30);
-
-// Query the model, putting output into the following two matrices.
-arma::mat distances;
-arma::Mat<size_t> neighbors;
-qdafn.Search(queries, 1, neighbors, distances);
-@endcode
-
-@subsection cpp_ex3_qdafn_akfntut Accessing the candidate set
-
-The \c QDAFN algorithm scans the reference set, extracting points that have been
-projected onto random directions.  Each random direction corresponds to a single
-table.  The \c QDAFN class stores these points as a vector of matrices, which
-can be accessed with the \c CandidateSet() method.  The code below prints the
-fifth point of the candidate set of the third table.
-
-@code
-#include <mlpack/methods/approx_kfn/qdafn.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-
-// Construct the model with custom parameters.
-QDAFN<> qdafn(dataset, 10, 5);
-
-// Print the fifth point of the candidate set.
-std::cout << ds.CandidateSet(2).col(4).t();
-@endcode
-
-@subsection cpp_ex4_qdafn_akfntut Retraining on a new reference set
-
-It is possible to retrain a \c QDAFN model with new parameters or with
-a new reference set.  This is functionally equivalent to creating a new model.
-The example code below creates a first \c QDAFN model using 10 tables
-and 40 projections, and then retrains this with the same reference set using 15
-tables and 25 projections.
-
-@code
-#include <mlpack/methods/approx_kfn/qdafn.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-
-// Construct the model with initial parameters.
-QDAFN<> qdafn(dataset, 3, 10);
-
-// Now retrain with different parameters.
-qdafn.Train(dataset, 10, 3);
-@endcode
-
-@subsection cpp_ex5_qdafn_akfntut Running on sparse data
-
-We can set the template parameter for \c QDAFN to \c arma::sp_mat in
-order to perform furthest neighbor search on sparse data.  This code below
-creates a \c QDAFN model using 20 tables and 60 projections with sparse
-input data, then searches for 3 approximate furthest neighbors.
-
-@code
-#include <mlpack/methods/approx_kfn/qdafn.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::sp_mat dataset;
-// The query dataset.
-extern arma::sp_mat querySet;
-
-// Construct the model on sparse data.
-QDAFN<arma::sp_mat> qdafn(dataset, 20, 60);
-
-// Search on query data.
-arma::Mat<size_t> neighbors;
-arma::mat distances;
-qdafn.Search(querySet, 3, neighbors, distances);
-@endcode
-
-@section cpp_ns_akfntut KFN C++ class
-
-The extensive \c NeighborSearch class also provides a way to search for
-approximate furthest neighbors using a different, tree-based technique.  For
-full documentation on this class, see the
-\ref nstutorial "NeighborSearch tutorial".  The \c KFN class is a convenient
-typedef of the \c NeighborSearch class that can be used to perform the furthest
-neighbors task with kd-trees.
-
-In the following subsections, the \c KFN class is used in short code examples.
-
-@subsection cpp_ex1_ns_akfntut Simple furthest neighbors example
-
-The \c KFN class has construction semantics similar to \c DrusillaSelect and
-\c QDAFN.  The example below constructs a \c KFN object (which will build the
-tree on the reference set), but note that the third parameter to the constructor
-allows us to specify our desired level of approximation.  In this example we
-choose epsilon = 0.05.  Then, the code searches for 3 approximate furthest
-neighbors.
-
-@code
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference dataset.
-extern arma::mat dataset;
-// The query set.
-extern arma::mat querySet;
-
-// Construct the object, performing the default dual-tree search with
-// approximation level epsilon = 0.05.
-KFN kfn(dataset, KFN::DUAL_TREE_MODE, 0.05);
-
-// Search for approximate furthest neighbors.
-arma::Mat<size_t> neighbors;
-arma::mat distances;
-kfn.Search(querySet, 3, neighbors, distances);
-@endcode
-
-@subsection cpp_ex2_ns_akfntut Retraining on a new reference set
-
-Like the \c QDAFN and \c DrusillaSelect classes, the \c KFN class is capable of
-retraining on a new reference set.  The code below demonstrates this.
-
-@code
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack::neighbor;
-
-// The original reference set we train on.
-extern arma::mat dataset;
-// The new reference set we retrain on.
-extern arma::mat newDataset;
-
-// Construct the object with approximation level 0.1.
-KFN kfn(dataset, DUAL_TREE_MODE, 0.1);
-
-// Retrain on the new reference set.
-kfn.Train(newDataset);
-@endcode
-
-@subsection cpp_ex3_ns_akfntut Searching in single-tree mode
-
-The particular mode to be used in search can be specified in the constructor.
-In this example, we use single-tree search (as opposed to the default of
-dual-tree search).
-
-@code
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference set.
-extern arma::mat dataset;
-// The query set.
-extern arma::mat querySet;
-
-// Construct the object with approximation level 0.25 and in single tree search
-// mode.
-KFN kfn(dataset, SINGLE_TREE_MODE, 0.25);
-
-// Search for 5 approximate furthest neighbors.
-arma::Mat<size_t> neighbors;
-arma::mat distances;
-kfn.Search(querySet, 5, neighbors, distances);
-@endcode
-
-@subsection cpp_ex4_ns_akfntut Searching in brute-force mode
-
-If desired, brute-force search ("naive search") can be used to find the furthest
-neighbors; however, the result will not be approximate---it will be exact (since
-every possibility will be considered).  The code below performs exact furthest
-neighbor search by using the \c KFN class in brute-force mode.
-
-@code
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack::neighbor;
-
-// The reference set.
-extern arma::mat dataset;
-// The query set.
-extern arma::mat querySet;
-
-// Construct the object in brute-force mode.  We can leave the approximation
-// parameter to its default (0) since brute-force will provide exact results.
-KFN kfn(dataset, NAIVE_MODE);
-
-// Perform the search for 2 furthest neighbors.
-arma::Mat<size_t> neighbors;
-arma::mat distances;
-kfn.Search(querySet, 2, neighbors, distances);
-@endcode
-
-@section further_doc_akfntut Further documentation
-
-For further documentation on the approximate furthest neighbor facilities
-offered by \b mlpack, consult the following documentation:
-
- - \ref nstutorial
- - \ref mlpack::neighbor::QDAFN "QDAFN class documentation"
- - \ref mlpack::neighbor::DrusillaSelect "DrusillaSelect class documentation"
- - \ref mlpack::neighbor::NeighborSearch "NeighborSearch class documentation"
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/approx_kfn.md 4.0.1-1/doc/tutorials/approx_kfn.md
--- 3.4.2-7/doc/tutorials/approx_kfn.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/approx_kfn.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,987 @@
+# Approximate furthest neighbor search tutorial
+
+mlpack implements multiple strategies for approximate furthest neighbor
+search in its `mlpack_approx_kfn` and `mlpack_kfn` command-line programs (each
+program corresponds to different techniques).  This tutorial discusses what
+problems these algorithms solve and how to use each of the techniques that
+mlpack implements.
+
+Note that these functions are available as bindings to other languages too, and
+all the examples here can be adapted accordingly.
+
+mlpack implements five approximate furthest neighbor search algorithms:
+
+ - brute-force search (in `mlpack_kfn`)
+ - single-tree search (in `mlpack_kfn`)
+ - dual-tree search (in `mlpack_kfn`)
+ - query-dependent approximate furthest neighbor (QDAFN) (in `mlpack_approx_kfn`)
+ - DrusillaSelect (in `mlpack_approx_kfn`)
+
+These methods are described in the following papers:
+
+```
+@inproceedings{curtin2013tree,
+  title={Tree-Independent Dual-Tree Algorithms},
+  author={Curtin, Ryan R. and March, William B. and Ram, Parikshit and Anderson,
+      David V. and Gray, Alexander G. and Isbell Jr., Charles L.},
+  booktitle={Proceedings of The 30th International Conference on Machine
+      Learning (ICML '13)},
+  pages={1435--1443},
+  year={2013}
+}
+```
+
+```
+@incollection{pagh2015approximate,
+  title={Approximate furthest neighbor in high dimensions},
+  author={Pagh, Rasmus and Silvestri, Francesco and Sivertsen, Johan and Skala,
+      Matthew},
+  booktitle={Similarity Search and Applications},
+  pages={3--14},
+  year={2015},
+  publisher={Springer}
+}
+```
+
+```
+@incollection{curtin2016fast,
+  title={Fast approximate furthest neighbors with data-dependent candidate
+      selection},
+  author={Curtin, Ryan R., and Gardner, Andrew B.},
+  booktitle={Similarity Search and Applications},
+  pages={221--235},
+  year={2016},
+  publisher={Springer}
+}
+```
+
+```
+@article{curtin2018exploiting,
+  title={Exploiting the structure of furthest neighbor search for fast
+      approximate results},
+  author={Curtin, Ryan R., and Echauz, Javier, and Gardner, Andrew B.},
+  journal={Information Systems},
+  year={2018},
+  publisher={Elsevier}
+}
+```
+
+The problem of furthest neighbor search is simple, and is the opposite of the
+much-more-studied nearest neighbor search problem.  Given a set of reference
+points `R` (the set in which we are searching), and a set of query points `Q`
+(the set of points for which we want the furthest neighbor), our goal is to
+return the `k` furthest neighbors for each query point in `Q`:
+
+```
+k-argmax_{p_r in R} d(p_q, p_r).
+```
+
+In order to solve this problem, mlpack provides a number of interfaces.
+
+ - two simple command-line executables to calculate approximate furthest
+   neighbors
+ - a simple C++ class for QDAFN"
+ - a simple C++ class for DrusillaSelect
+ - a simple C++ class for tree-based and brute-force search
+
+## Which algorithm should be used?
+
+There are three algorithms for furthest neighbor search that mlpack
+implements, and each is suited to a different setting.  Below is some basic
+guidance on what should be used.  Note that the question of "which algorithm
+should be used" is a very difficult question to answer, so the guidance below is
+just that---guidance---and may not be right for a particular problem.
+
+ - `DrusillaSelect` is very fast and will perform extremely well for datasets
+   with outliers or datasets with structure (like low-dimensional datasets
+   embedded in high dimensions)
+ - `QDAFN` is a random approach and therefore should be well-suited for
+   datasets with little to no structure
+ - The tree-based approaches (the `KFN` class and the `mlpack_kfn` program) is
+   best suited for low-dimensional datasets, and is most effective when very
+   small levels of approximation are desired, or when exact results are desired.
+ - Dual-tree search is most useful when the query set is large and structured
+   (like for all-furthest-neighbor search).
+ - Single-tree search is more useful when the query set is small.
+
+## Command-line `mlpack_approx_kfn` and `mlpack_kfn`
+
+mlpack provides two command-line programs to solve approximate furthest neighbor
+search:
+
+ - `mlpack_approx_kfn`, for the QDAFN and DrusillaSelect approaches
+ - `mlpack_kfn`, for exact and approximate tree-based approaches
+
+These two programs allow a large number of algorithms to be used to find
+approximate furthest neighbors.  Note that the `mlpack_kfn` program is also
+documented in the [KNN tutorial](knn.md) page, as it shares options with the
+`mlpack_knn` program.
+
+Below are several examples of how the `mlpack_approx_kfn` and `mlpack_kfn`
+programs might be used.  The first examples focus on the `mlpack_approx_kfn`
+program, and the last few show how `mlpack_kfn` can be used to produce
+approximate results.
+
+### Calculate 5 furthest neighbors with default options
+
+Here we have a query dataset `queries.csv` and a reference dataset `refs.csv`
+and we wish to find the 5 furthest neighbors of every query point in the
+reference dataset.  We may do that with the `mlpack_approx_kfn` algorithm,
+using the default of the `DrusillaSelect` algorithm with default parameters.
+
+```sh
+$ mlpack_approx_kfn -q queries.csv -r refs.csv -v -k 5 -n n.csv -d d.csv
+[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Building DrusillaSelect model...
+[INFO ] Model built.
+[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Searching for 5 furthest neighbors with DrusillaSelect...
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'n.csv'.
+[INFO ] Saving CSV data to 'd.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: ds
+[INFO ]   calculate_error: false
+[INFO ]   distances_file: d.csv
+[INFO ]   exact_distances_file: ""
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 5
+[INFO ]   neighbors_file: n.csv
+[INFO ]   num_projections: 5
+[INFO ]   num_tables: 5
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: queries.csv
+[INFO ]   reference_file: refs.csv
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   drusilla_select_construct: 0.000342s
+[INFO ]   drusilla_select_search: 0.000780s
+[INFO ]   loading_data: 0.010689s
+[INFO ]   saving_data: 0.005585s
+[INFO ]   total_time: 0.018592s
+```
+
+Convenient timers for parts of the program operation are printed.  The results,
+saved in `n.csv` and `d.csv`, indicate the furthest neighbors and distances for
+each query point.  The row of the output file indicates the query point that the
+results are for.  The neighbors are listed from furthest to nearest; so, the 4th
+element in the 3rd row of `d.csv` indicates the distance between the 3rd query
+point in `queries.csv` and its approximate 4th furthest neighbor.  Similarly,
+the same element in `n.csv` indicates the index of the approximate 4th furthest
+neighbor (with respect to `refs.csv`).
+
+### Specifying algorithm parameters for `DrusillaSelect`
+
+The `-p` (`--num_projections`) and `-t` (`--num_tables`) parameters affect the
+running of the `DrusillaSelect` algorithm and the QDAFN algorithm.
+Specifically, larger values for each of these parameters will search more
+possible candidate furthest neighbors and produce better results (at the cost of
+runtime).  More details on how each of these parameters works is available in
+the original papers, the mlpack source, or the documentation given by `--help`.
+
+In the example below, we run `DrusillaSelect` to find 4 furthest neighbors using
+10 tables and 2 points in each table.  In this case we have chosen to omit the
+`-n n.csv` option, meaning that only the output candidate distances will be
+written to `d.csv`.
+
+```sh
+$ mlpack_approx_kfn -q queries.csv -r refs.csv -v -k 4 -n n.csv -d d.csv -t 10 -p 2
+[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Building DrusillaSelect model...
+[INFO ] Model built.
+[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Searching for 4 furthest neighbors with DrusillaSelect...
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'n.csv'.
+[INFO ] Saving CSV data to 'd.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: ds
+[INFO ]   calculate_error: false
+[INFO ]   distances_file: d.csv
+[INFO ]   exact_distances_file: ""
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 4
+[INFO ]   neighbors_file: n.csv
+[INFO ]   num_projections: 2
+[INFO ]   num_tables: 10
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: queries.csv
+[INFO ]   reference_file: refs.csv
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   drusilla_select_construct: 0.000645s
+[INFO ]   drusilla_select_search: 0.000551s
+[INFO ]   loading_data: 0.008518s
+[INFO ]   saving_data: 0.003734s
+[INFO ]   total_time: 0.014019s
+```
+
+### Using QDAFN instead of `DrusillaSelect`
+
+The algorithm to be used for approximate furthest neighbor search can be
+specified with the `--algorithm` (`-a`) option to the `mlpack_approx_kfn`
+program.  Below, we use the QDAFN algorithm instead of the default.  We leave
+the `-p` and `-t` options at their defaults---even though QDAFN often requires
+more tables and points to get the same quality of results.
+
+```sh
+$ mlpack_approx_kfn -q queries.csv -r refs.csv -v -k 3 -n n.csv -d d.csv -a qdafn
+[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Building QDAFN model...
+[INFO ] Model built.
+[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Searching for 3 furthest neighbors with QDAFN...
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'n.csv'.
+[INFO ] Saving CSV data to 'd.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: qdafn
+[INFO ]   calculate_error: false
+[INFO ]   distances_file: d.csv
+[INFO ]   exact_distances_file: ""
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 3
+[INFO ]   neighbors_file: n.csv
+[INFO ]   num_projections: 5
+[INFO ]   num_tables: 5
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: queries.csv
+[INFO ]   reference_file: refs.csv
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   loading_data: 0.008380s
+[INFO ]   qdafn_construct: 0.003399s
+[INFO ]   qdafn_search: 0.000886s
+[INFO ]   saving_data: 0.002253s
+[INFO ]   total_time: 0.015465s
+```
+
+### Printing results quality with exact distances
+
+The `mlpack_approx_kfn` program can calculate the quality of the results if the
+`--calculate_error` (`-e`) flag is specified.  Below we use the program with its
+default parameters and calculate the error, which is displayed in the output.
+The error is only calculated for the furthest neighbor, not all k; therefore, in
+this example we have set `-k` to `1`.
+
+```sh
+$ mlpack_approx_kfn -q queries.csv -r refs.csv -v -k 1 -e -q -n n.csv
+[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Building DrusillaSelect model...
+[INFO ] Model built.
+[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Searching for 1 furthest neighbors with DrusillaSelect...
+[INFO ] Search complete.
+[INFO ] Calculating exact distances...
+[INFO ] 28891 node combinations were scored.
+[INFO ] 37735 base cases were calculated.
+[INFO ] Calculation complete.
+[INFO ] Average error: 1.08417.
+[INFO ] Maximum error: 1.28712.
+[INFO ] Minimum error: 1.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: ds
+[INFO ]   calculate_error: true
+[INFO ]   distances_file: ""
+[INFO ]   exact_distances_file: ""
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 3
+[INFO ]   neighbors_file: ""
+[INFO ]   num_projections: 5
+[INFO ]   num_tables: 5
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: queries.csv
+[INFO ]   reference_file: refs.csv
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   computing_neighbors: 0.001476s
+[INFO ]   drusilla_select_construct: 0.000309s
+[INFO ]   drusilla_select_search: 0.000495s
+[INFO ]   loading_data: 0.008462s
+[INFO ]   total_time: 0.011670s
+[INFO ]   tree_building: 0.000202s
+```
+
+Note that the output includes three lines indicating the error:
+
+```sh
+[INFO ] Average error: 1.08417.
+[INFO ] Maximum error: 1.28712.
+[INFO ] Minimum error: 1.
+```
+
+In this case, a minimum error of 1 indicates an exact result, and over the
+entire query set the algorithm has returned a furthest neighbor candidate with
+maximum error 1.28712.
+
+### Using cached exact distances for quality results
+
+However, for large datasets, calculating the error may take a long time, because
+the exact furthest neighbors must be calculated.  Therefore, if the exact
+furthest neighbor distances are already known, they may be passed in with the
+`--exact_distances_file` (`-x`) option in order to avoid the calculation.  In
+the example below, we assume `exact.csv` contains the exact furthest neighbor
+distances.  We run the `qdafn` algorithm in this example.
+
+Note that the `-e` option must be specified for the `-x` option have any effect.
+
+```sh
+$ mlpack_approx_kfn -q queries.csv -r refs.csv -k 1 -e -x exact.csv -n n.csv -v -a qdafn
+[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Building QDAFN model...
+[INFO ] Model built.
+[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Searching for 1 furthest neighbors with QDAFN...
+[INFO ] Search complete.
+[INFO ] Loading 'exact.csv' as raw ASCII formatted data.  Size is 1 x 1000.
+[INFO ] Average error: 1.06914.
+[INFO ] Maximum error: 1.67407.
+[INFO ] Minimum error: 1.
+[INFO ] Saving CSV data to 'n.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: qdafn
+[INFO ]   calculate_error: true
+[INFO ]   distances_file: ""
+[INFO ]   exact_distances_file: exact.csv
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 1
+[INFO ]   neighbors_file: n.csv
+[INFO ]   num_projections: 5
+[INFO ]   num_tables: 5
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: queries.csv
+[INFO ]   reference_file: refs.csv
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   loading_data: 0.010348s
+[INFO ]   qdafn_construct: 0.000318s
+[INFO ]   qdafn_search: 0.000793s
+[INFO ]   saving_data: 0.000259s
+[INFO ]   total_time: 0.012254s
+```
+
+### Using tree-based approximation with `mlpack_kfn`
+
+The `mlpack_kfn` algorithm allows specifying a desired approximation level with
+the `--epsilon` (`-e`) option.  The parameter must be greater than or equal to 0
+and less than 1.  A setting of 0 indicates exact search.
+
+The example below runs dual-tree furthest neighbor search (the default
+algorithm) with the approximation parameter set to 0.5.
+
+```sh
+$ mlpack_kfn -q queries.csv -r refs.csv -v -k 3 -e 0.5 -n n.csv -d d.csv
+[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded reference data from 'refs.csv' (3x1000).
+[INFO ] Building reference tree...
+[INFO ] Tree built.
+[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded query data from 'queries.csv' (3x1000).
+[INFO ] Searching for 3 neighbors with dual-tree kd-tree search...
+[INFO ] 1611 node combinations were scored.
+[INFO ] 13938 base cases were calculated.
+[INFO ] 1611 node combinations were scored.
+[INFO ] 13938 base cases were calculated.
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'n.csv'.
+[INFO ] Saving CSV data to 'd.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: dual_tree
+[INFO ]   distances_file: d.csv
+[INFO ]   epsilon: 0.5
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 3
+[INFO ]   leaf_size: 20
+[INFO ]   naive: false
+[INFO ]   neighbors_file: n.csv
+[INFO ]   output_model_file: ""
+[INFO ]   percentage: 1
+[INFO ]   query_file: queries.csv
+[INFO ]   random_basis: false
+[INFO ]   reference_file: refs.csv
+[INFO ]   seed: 0
+[INFO ]   single_mode: false
+[INFO ]   tree_type: kd
+[INFO ]   true_distances_file: ""
+[INFO ]   true_neighbors_file: ""
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   computing_neighbors: 0.000442s
+[INFO ]   loading_data: 0.008060s
+[INFO ]   saving_data: 0.002850s
+[INFO ]   total_time: 0.012667s
+[INFO ]   tree_building: 0.000251s
+```
+
+Note that the format of the output files `d.csv` and `n.csv` are the same as
+for `mlpack_approx_kfn`.
+
+### Different algorithms with `mlpack_kfn`
+
+The `mlpack_kfn` program offers a large number of different algorithms that can
+be used.  The `--algorithm` (`-a`) parameter may be used to specify three main
+different algorithm types: `naive` (brute-force search), `single_tree`
+(single-tree search), `dual_tree` (dual-tree search, the default), and `greedy`
+("defeatist" greedy search, which goes to one leaf node of the tree then
+terminates).  The example below uses single-tree search to find approximate
+neighbors with epsilon set to 0.1.
+
+```sh
+mlpack_kfn -q queries.csv -r refs.csv -v -k 3 -e 0.1 -n n.csv -d d.csv -a single_tree
+[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded reference data from 'refs.csv' (3x1000).
+[INFO ] Building reference tree...
+[INFO ] Tree built.
+[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded query data from 'queries.csv' (3x1000).
+[INFO ] Searching for 3 neighbors with single-tree kd-tree search...
+[INFO ] 13240 node combinations were scored.
+[INFO ] 15924 base cases were calculated.
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'n.csv'.
+[INFO ] Saving CSV data to 'd.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: single_tree
+[INFO ]   distances_file: d.csv
+[INFO ]   epsilon: 0.1
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 3
+[INFO ]   leaf_size: 20
+[INFO ]   naive: false
+[INFO ]   neighbors_file: n.csv
+[INFO ]   output_model_file: ""
+[INFO ]   percentage: 1
+[INFO ]   query_file: queries.csv
+[INFO ]   random_basis: false
+[INFO ]   reference_file: refs.csv
+[INFO ]   seed: 0
+[INFO ]   single_mode: false
+[INFO ]   tree_type: kd
+[INFO ]   true_distances_file: ""
+[INFO ]   true_neighbors_file: ""
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   computing_neighbors: 0.000850s
+[INFO ]   loading_data: 0.007858s
+[INFO ]   saving_data: 0.003445s
+[INFO ]   total_time: 0.013084s
+[INFO ]   tree_building: 0.000250s
+```
+
+## Saving a model for later use
+
+The `mlpack_approx_kfn` and `mlpack_kfn` programs both allow models to be saved
+and loaded for future use.  The `--output_model_file` (`-M`) option allows
+specifying where to save a model, and the `--input_model_file` (`-m`) option
+allows a model to be loaded instead of trained.  So, if you specify
+`--input_model_file` then you do not need to specify `--reference_file` (`-r`),
+`--num_projections` (`-p`), or `--num_tables` (`-t`).
+
+The example below saves a model with 10 projections and 5 tables.  Note that
+neither `--query_file` (`-q`) nor `-k` are specified; this run only builds the
+model and saves it to `model.bin`.
+
+```sh
+$ mlpack_approx_kfn -r refs.csv -t 5 -p 10 -v -M model.bin
+[INFO ] Loading 'refs.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Building DrusillaSelect model...
+[INFO ] Model built.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: ds
+[INFO ]   calculate_error: false
+[INFO ]   distances_file: ""
+[INFO ]   exact_distances_file: ""
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 0
+[INFO ]   neighbors_file: ""
+[INFO ]   num_projections: 10
+[INFO ]   num_tables: 5
+[INFO ]   output_model_file: model.bin
+[INFO ]   query_file: ""
+[INFO ]   reference_file: refs.csv
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   drusilla_select_construct: 0.000321s
+[INFO ]   loading_data: 0.004700s
+[INFO ]   total_time: 0.007320s
+```
+
+Now, with the model saved, we can run approximate furthest neighbor search on a
+query set using the saved model:
+
+```sh
+$ mlpack_approx_kfn -m model.bin -q queries.csv -k 3 -d d.csv -n n.csv -v
+[INFO ] Loading 'queries.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Searching for 3 furthest neighbors with DrusillaSelect...
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'n.csv'.
+[INFO ] Saving CSV data to 'd.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   algorithm: ds
+[INFO ]   calculate_error: false
+[INFO ]   distances_file: d.csv
+[INFO ]   exact_distances_file: ""
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: model.bin
+[INFO ]   k: 3
+[INFO ]   neighbors_file: n.csv
+[INFO ]   num_projections: 5
+[INFO ]   num_tables: 5
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: queries.csv
+[INFO ]   reference_file: ""
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   drusilla_select_search: 0.000878s
+[INFO ]   loading_data: 0.004599s
+[INFO ]   saving_data: 0.003006s
+[INFO ]   total_time: 0.009234s
+```
+
+These options work in the same way for both the `mlpack_approx_kfn` and
+`mlpack_kfn` programs.
+
+### Final command-line program notes
+
+Both the `mlpack_kfn` and `mlpack_approx_kfn` programs contain numerous options
+not fully documented in these short examples.  You can run each program with the
+`--help` (`-h`) option for more information.
+
+## `DrusillaSelect` C++ class
+
+mlpack provides a simple `DrusillaSelect` C++ class that can be used inside of
+C++ programs to perform approximate furthest neighbor search.  The class has
+only one template parameter---`MatType`---which specifies the type of matrix to
+be use.  That means the class can be used with either dense data (of type
+`arma::mat`) or sparse data (of type `arma::sp_mat`).
+
+The following examples show simple usage of this class.
+
+### Approximate furthest neighbors with defaults
+
+The code below builds a `DrusillaSelect` model with default options on the
+matrix `dataset`, then queries for the approximate furthest neighbor of every
+point in the `queries` matrix.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+// The query set.
+extern arma::mat queries;
+
+// Construct the model with defaults.
+DrusillaSelect<> ds(dataset);
+
+// Query the model, putting output into the following two matrices.
+arma::mat distances;
+arma::Mat<size_t> neighbors;
+ds.Search(queries, 1, neighbors, distances);
+```
+
+At the end of this code, both the `distances` and `neighbors` matrices will have
+number of columns equal to the number of columns in the `queries` matrix.  So,
+each column of the `distances` and `neighbors` matrices are the distances or
+neighbors of the corresponding column in the `queries` matrix.
+
+### Custom numbers of tables and projections
+
+The following example constructs a `DrusillaSelect` model with 10 tables and 5
+projections.  Once that is done it performs the same task as the previous
+example.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+// The query set.
+extern arma::mat queries;
+
+// Construct the model with custom parameters.
+DrusillaSelect<> ds(dataset, 10, 5);
+
+// Query the model, putting output into the following two matrices.
+arma::mat distances;
+arma::Mat<size_t> neighbors;
+ds.Search(queries, 1, neighbors, distances);
+```
+
+### Accessing the candidate set
+
+The `DrusillaSelect` algorithm merely scans the reference set and extracts a
+number of points that will be queried in a brute-force fashion when the
+`Search()` method is called.  We can access this set with the `CandidateSet()`
+method.  The code below prints the fifth point of the candidate set.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+
+// Construct the model with custom parameters.
+DrusillaSelect<> ds(dataset, 10, 5);
+
+// Print the fifth point of the candidate set.
+std::cout << ds.CandidateSet().col(4).t();
+```
+
+### Retraining on a new reference set
+
+It is possible to retrain a `DrusillaSelect` model with new parameters or with a
+new reference set.  This is functionally equivalent to creating a new model.
+The example code below creates a first \c DrusillaSelect model using 3 tables
+and 10 projections, and then retrains this with the same reference set using 10
+tables and 3 projections.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+
+// Construct the model with initial parameters.
+DrusillaSelect<> ds(dataset, 3, 10);
+
+// Now retrain with different parameters.
+ds.Train(dataset, 10, 3);
+```
+
+### Running on sparse data
+
+We can set the template parameter for `DrusillaSelect` to `arma::sp_mat` in
+order to perform furthest neighbor search on sparse data.  This code below
+creates a `DrusillaSelect` model using 4 tables and 6 projections with sparse
+input data, then searches for 3 approximate furthest neighbors.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::sp_mat dataset;
+// The query dataset.
+extern arma::sp_mat querySet;
+
+// Construct the model on sparse data.
+DrusillaSelect<arma::sp_mat> ds(dataset, 4, 6);
+
+// Search on query data.
+arma::Mat<size_t> neighbors;
+arma::mat distances;
+ds.Search(querySet, 3, neighbors, distances);
+```
+
+## QDAFN C++ class
+
+mlpack also provides a standalone simple `QDAFN` class for furthest neighbor
+search.  The API for this class is virtually identical to the `DrusillaSelect`
+class, and also has one template parameter to specify the type of matrix to be
+used (dense or sparse or other).
+
+The following subsections demonstrate usage of the `QDAFN` class in the same way
+as the previous section's examples for `DrusillaSelect`.
+
+### Approximate furthest neighbors with defaults
+
+The code below builds a `QDAFN` model with default options on the matrix
+`dataset`, then queries for the approximate furthest neighbor of every point in
+the `queries` matrix.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+// The query set.
+extern arma::mat queries;
+
+// Construct the model with defaults.
+QDAFN<> qd(dataset);
+
+// Query the model, putting output into the following two matrices.
+arma::mat distances;
+arma::Mat<size_t> neighbors;
+qd.Search(queries, 1, neighbors, distances);
+```
+
+At the end of this code, both the `distances` and `neighbors` matrices will have
+number of columns equal to the number of columns in the `queries` matrix.  So,
+each column of the `distances` and `neighbors` matrices are the distances or
+neighbors of the corresponding column in the `queries` matrix.
+
+### Custom numbers of tables and projections
+
+The following example constructs a `QDAFN` model with 15 tables and 30
+projections.  Once that is done it performs the same task as the previous
+example.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+// The query set.
+extern arma::mat queries;
+
+// Construct the model with custom parameters.
+QDAFN<> qdafn(dataset, 15, 30);
+
+// Query the model, putting output into the following two matrices.
+arma::mat distances;
+arma::Mat<size_t> neighbors;
+qdafn.Search(queries, 1, neighbors, distances);
+```
+
+### Accessing the candidate set
+
+The `QDAFN` algorithm scans the reference set, extracting points that have been
+projected onto random directions.  Each random direction corresponds to a single
+table.  The `QDAFN` class stores these points as a vector of matrices, which can
+be accessed with the `CandidateSet()` method.  The code below prints the fifth
+point of the candidate set of the third table.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+
+// Construct the model with custom parameters.
+QDAFN<> qdafn(dataset, 10, 5);
+
+// Print the fifth point of the candidate set.
+std::cout << ds.CandidateSet(2).col(4).t();
+```
+
+### Retraining on a new reference set
+
+It is possible to retrain a `QDAFN` model with new parameters or with a new
+reference set.  This is functionally equivalent to creating a new model.  The
+example code below creates a first `QDAFN` model using 10 tables and 40
+projections, and then retrains this with the same reference set using 15 tables
+and 25 projections.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+
+// Construct the model with initial parameters.
+QDAFN<> qdafn(dataset, 3, 10);
+
+// Now retrain with different parameters.
+qdafn.Train(dataset, 10, 3);
+```
+
+### Running on sparse data
+
+We can set the template parameter for `QDAFN` to `arma::sp_mat` in order to
+perform furthest neighbor search on sparse data.  This code below creates a
+`QDAFN` model using 20 tables and 60 projections with sparse input data, then
+searches for 3 approximate furthest neighbors.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::sp_mat dataset;
+// The query dataset.
+extern arma::sp_mat querySet;
+
+// Construct the model on sparse data.
+QDAFN<arma::sp_mat> qdafn(dataset, 20, 60);
+
+// Search on query data.
+arma::Mat<size_t> neighbors;
+arma::mat distances;
+qdafn.Search(querySet, 3, neighbors, distances);
+```
+
+## KFN C++ class
+
+The extensive `NeighborSearch` class also provides a way to search for
+approximate furthest neighbors using a different, tree-based technique.  For
+full documentation on this class, see the [NeighborSearch
+tutorial](nstutorial.md).  The `KFN` class is a convenient typedef of the
+`NeighborSearch` class that can be used to perform the furthest neighbors task
+with `kd`-trees.
+
+In the following subsections, the `KFN` class is used in short code examples.
+
+### Simple furthest neighbors example
+
+The `KFN` class has construction semantics similar to `DrusillaSelect` and
+`QDAFN`.  The example below constructs a `KFN` object (which will build the
+tree on the reference set), but note that the third parameter to the constructor
+allows us to specify our desired level of approximation.  In this example we
+choose `epsilon = 0.05`.  Then, the code searches for 3 approximate furthest
+neighbors.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference dataset.
+extern arma::mat dataset;
+// The query set.
+extern arma::mat querySet;
+
+// Construct the object, performing the default dual-tree search with
+// approximation level epsilon = 0.05.
+KFN kfn(dataset, KFN::DUAL_TREE_MODE, 0.05);
+
+// Search for approximate furthest neighbors.
+arma::Mat<size_t> neighbors;
+arma::mat distances;
+kfn.Search(querySet, 3, neighbors, distances);
+```
+
+### Retraining on a new reference set
+
+Like the `QDAFN` and `DrusillaSelect` classes, the `KFN` class is capable of
+retraining on a new reference set.  The code below demonstrates this.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The original reference set we train on.
+extern arma::mat dataset;
+// The new reference set we retrain on.
+extern arma::mat newDataset;
+
+// Construct the object with approximation level 0.1.
+KFN kfn(dataset, DUAL_TREE_MODE, 0.1);
+
+// Retrain on the new reference set.
+kfn.Train(newDataset);
+```
+
+### Searching in single-tree mode
+
+The particular mode to be used in search can be specified in the constructor.
+In this example, we use single-tree search (as opposed to the default of
+dual-tree search).
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference set.
+extern arma::mat dataset;
+// The query set.
+extern arma::mat querySet;
+
+// Construct the object with approximation level 0.25 and in single tree search
+// mode.
+KFN kfn(dataset, SINGLE_TREE_MODE, 0.25);
+
+// Search for 5 approximate furthest neighbors.
+arma::Mat<size_t> neighbors;
+arma::mat distances;
+kfn.Search(querySet, 5, neighbors, distances);
+```
+
+### Searching in brute-force mode
+
+If desired, brute-force search ("naive search") can be used to find the furthest
+neighbors; however, the result will not be approximate---it will be exact (since
+every possibility will be considered).  The code below performs exact furthest
+neighbor search by using the `KFN` class in brute-force mode.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The reference set.
+extern arma::mat dataset;
+// The query set.
+extern arma::mat querySet;
+
+// Construct the object in brute-force mode.  We can leave the approximation
+// parameter to its default (0) since brute-force will provide exact results.
+KFN kfn(dataset, NAIVE_MODE);
+
+// Perform the search for 2 furthest neighbors.
+arma::Mat<size_t> neighbors;
+arma::mat distances;
+kfn.Search(querySet, 2, neighbors, distances);
+```
+
+## Further documentation
+
+For further documentation on the approximate furthest neighbor facilities
+offered by mlpack, see also [the NeighborSearch tutorial](nstutorial.md).  Also,
+each class (`QDAFN`, `DrusillaSelect`, `NeighborSelect`) are well-documented,
+and more details can be found in the source code documentation.
diff -pruN 3.4.2-7/doc/tutorials/cf/cf.txt 4.0.1-1/doc/tutorials/cf/cf.txt
--- 3.4.2-7/doc/tutorials/cf/cf.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/cf/cf.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,472 +0,0 @@
-/*!
-
-@file cf.txt
-@author Ryan Curtin
-@brief Tutorial for how to use the CF class and program.
-
-@page cftutorial Collaborative filtering tutorial
-
-@section intro_cftut Introduction
-
-Collaborative filtering is an increasingly popular approach for recommender
-systems.  A typical formulation of the problem is as follows: there are \f$n\f$
-users and \f$m\f$ items, and each user has rated some of the items.  We want to
-provide each user with a recommendation for an item they have not rated yet,
-which they are likely to rate highly.  In another formulation, we may want to
-predict a user's rating of an item.  This type of problem has been considered
-extensively, especially in the context of the Netflix prize.  The winning
-approach for the Netflix prize was a collaborative filtering approach which
-utilized matrix decomposition.  More information on their approach can be found
-in the following paper:
-
-@code
-@article{koren2009matrix,
-  title={Matrix factorization techniques for recommender systems},
-  author={Koren, Yehuda and Bell, Robert and Volinsky, Chris},
-  journal={Computer},
-  number={8},
-  pages={30--37},
-  year={2009},
-  publisher={IEEE}
-}
-@endcode
-
-The key to this approach is that the data is represented as an incomplete matrix
-\f$V \in \Re^{n \times m}\f$, where \f$V_{ij}\f$ represents user \f$i\f$'s
-rating of item \f$j\f$, if that rating exists.  The task, then, is to complete
-the entries of the matrix.
-
-In the matrix factorization framework, the matrix \f$V\f$ is assumed to be
-low-rank and decomposed into components as \f$V \approx WH\f$ according to some
-heuristic.
-
-In order to solve problems of this form, \b mlpack provides:
-
- - a \ref cli_cftut "simple command-line interface" to perform collaborative filtering
- - a \ref cf_cftut "simple C++ interface" to perform collaborative filtering
- - an \ref cpp_cftut "extensible C++ interface" for implementing new collaborative filtering techniques
-
-@section toc_cftut Table of Contents
-
- - \ref intro_cftut
- - \ref toc_cftut
- - \ref cli_cftut
-   - \ref cli_input_format
-   - \ref ex1_cf_cli
-   - \ref ex1a_cf_cli
-   - \ref ex1b_cf_cli
-   - \ref ex2_cf_cli
-   - \ref ex3_cf_cli
-   - \ref ex4_cf_cli
-   - \ref ex5_cf_cli
- - \ref cf_cftut
-   - \ref ex1_cf_cpp
-   - \ref ex2_cf_cpp
-   - \ref ex3_cf_cpp
-   - \ref ex4_cf_cpp
- - \ref cpp_cftut
- - \ref further_doc_cftut
-
-@section cli_cftut The 'mlpack_cf' program
-
-\b mlpack provides a command-line program, \c mlpack_cf, which is used to
-perform collaborative filtering on a given dataset.  It can provide
-neighborhood-based recommendations for users.  The algorithm used for matrix
-factorization is configurable, and the parameters of each algorithm are also
-configurable.
-
-The following examples detail usage of the \c mlpack_cf program.  Note that you
-can get documentation on all the possible parameters by typing:
-
-@code
-$ mlpack_cf --help
-@endcode
-
-@subsection cli_input_format Input format for mlpack_cf
-
-The input file for the \c mlpack_cf program is specified with the \c
---training_file or \c -t option.  This file is a coordinate-format sparse
-matrix, similar to the Matrix Market (MM) format.  The first coordinate is the
-user id; the second coordinate is the item id; and the third coordinate is the
-rating.  So, for instance, a dataset with 3 users and 2 items, and ratings
-between 1 and 5, might look like the following:
-
-@code
-$ cat dataset.csv
-0, 1, 4
-1, 0, 5
-1, 1, 1
-2, 0, 2
-@endcode
-
-This dataset has four ratings: user 0 has rated item 1 with a rating of 4; user
-1 has rated item 0 with a rating of 5; user 1 has rated item 1 with a rating of
-1; and user 2 has rated item 0 with a rating of 2.  Note that the user and item
-indices start from 0, and the identifiers must be numeric indices, and not
-names.
-
-The type does not necessarily need to be a csv; it can be any supported storage
-format, assuming that it is a coordinate-format file in the format specified
-above.  For more information on mlpack file formats, see the documentation for
-mlpack::data::Load().
-
-@subsection ex1_cf_cli mlpack_cf with default parameters
-
-In this example, we have a dataset from MovieLens, and we want to use
-\c mlpack_cf with the default parameters, which will provide 5 recommendations
-for each user, and we wish to save the results in the file
-\c recommendations.csv.  Assuming that our dataset is in the file
-\c MovieLens-100k.csv and it is in the correct format, we may use the
-\c mlpack_cf executable as below:
-
-@code
-$ mlpack_cf -t MovieLens-100k.csv -v -o recommendations.csv
-@endcode
-
-The \c -v option provides verbose output, and may be omitted if desired.  Now,
-for each user, we have recommendations in \c recommendations.csv:
-
-@code
-$ head recommendations.csv
-317,422,482,356,495
-116,120,180,6,327
-312,49,116,99,236
-312,116,99,236,285
-55,190,317,194,63
-171,209,180,175,95
-208,0,94,87,57
-99,97,0,203,172
-257,99,180,287,0
-171,203,172,209,88
-@endcode
-
-So, for user 0, the top 5 recommended items that user 0 has not rated are items
-317, 422, 482, 356, and 495.  For user 5, the recommendations are on the sixth
-line: 171, 209, 180, 175, 95.
-
-The \c mlpack_cf program can be built into a larger recommendation framework,
-with a preprocessing step that can turn user information and item information
-into numeric IDs, and a postprocessing step that can map these numeric IDs back
-to the original information.
-
-@subsection ex1a_cf_cli Saving mlpack_cf models
-
-The \c mlpack_cf program is able to save a particular model for later loading.
-Saving a model can be done with the \c --output_model_file or \c -M option.  The
-example below builds a CF model on the \c MovieLens-100k.csv dataset, and then
-saves the model to the file \c cf-model.xml for later usage.
-
-@code
-$ mlpack_cf -t MovieLens-100k.csv -M cf-model.xml -v
-@endcode
-
-The models can also be saved as \c .bin or \c .txt; the \c .xml format provides
-a human-inspectable format (though the models tend to be quite complex and may
-be difficult to read).  These models can then be re-used to provide specific
-recommendations for certain users, or other tasks.
-
-@subsection ex1b_cf_cli Loading mlpack_cf models
-
-Instead of training a model, the \c mlpack_cf model can also load a model to
-provide recommendations, using the \c --input_model_file or \c -m option.  For
-instance, the example below will load the model from \c cf-model.xml and then
-generate 3 recommendations for each user in the dataset, saving the results to
-\c recommendations.csv.
-
-@code
-$ mlpack_cf -m cf-model.xml -v -o recommendations.csv
-@endcode
-
-@subsection ex2_cf_cli Specifying rank of mlpack_cf decomposition
-
-By default, the matrix factorizations in the \c mlpack_cf program decompose the
-data matrix into two matrices \f$W\f$ and \f$H\f$ with rank two.  Often, this
-default parameter is not correct, and it makes sense to use a higher-rank
-decomposition.  The rank can be specified with the \c --rank or \c -R parameter:
-
-@code
-$ mlpack_cf -t MovieLens-100k.csv -R 10 -v
-@endcode
-
-In the example above, the data matrix will be decomposed into two matrices of
-rank 10.  In general, higher-rank decompositions will take longer, but will give
-more accurate predictions.
-
-@subsection ex3_cf_cli mlpack_cf with single-user recommendation
-
-In the previous two examples, the output file \c recommendations.csv contains
-one line for each user in the input dataset.  But often, recommendations may
-only be desired for a few users.  In that case, we can assemble a file of query
-users, with one user per line:
-
-@code
-$ cat query.csv
-0
-17
-31
-@endcode
-
-Now, if we run the \c mlpack_cf executable with this query file, we will obtain
-recommendations for users 0, 17, and 31:
-
-@code
-$ mlpack_cf -i MovieLens-100k.csv -R 10 -q query.csv -o recommendations.csv
-$ cat recommendations.csv
-474,356,317,432,473
-510,172,204,483,182
-0,120,236,257,126
-@endcode
-
-@subsection ex4_cf_cli mlpack_cf with non-default factorizer
-
-The \c --algorithm (or \c -a ) parameter controls the factorizer that is used.
-Several options are available:
-
- - \c 'NMF': non-negative matrix factorization; see mlpack::amf::AMF<>
- - \c 'SVDBatch': SVD batch factorization
- - \c 'SVDIncompleteIncremental': incomplete incremental SVD
- - \c 'SVDCompleteIncremental': complete incremental SVD
- - \c 'RegSVD': regularized SVD; see mlpack::svd::RegularizedSVD
-
-The default factorizer is \c 'NMF'.  The example below uses the 'RegSVD'
-factorizer:
-
-@code
-$ mlpack_cf -i MovieLens-100k.csv -R 10 -q query.csv -a RegSVD -o recommendations.csv
-@endcode
-
-@subsection ex5_cf_cli mlpack_cf with non-default neighborhood size
-
-The \c mlpack_cf program produces recommendations using a neighborhood: similar
-users in the query user's neighborhood will be averaged to produce predictions.
-The size of this neighborhood is controlled with the \c --neighborhood (or \c -n
-) option.  An example using a neighborhood with 10 similar users is below:
-
-@code
-$ mlpack_cf -i MovieLens-100k.csv -R 10 -q query.csv -a RegSVD -n 10
-@endcode
-
-@section cf_cftut The 'CF' class
-
-The \c CF class in \b mlpack offers a simple, flexible API for performing
-collaborative filtering for recommender systems within C++ applications.  In the
-constructor, the \c CF class takes a coordinate-list dataset and decomposes the
-matrix according to the specified \c FactorizerType template parameter.
-
-Then, the \c GetRecommendations() function may be called to obtain
-recommendations for certain users (or all users), and the \c W() and \c H()
-matrices may be accessed to perform other computations.
-
-The data which the \c CF constructor takes should be an Armadillo matrix (\c
-arma::mat ) with three rows.  The first row corresponds to users; the second
-row corresponds to items; the third column corresponds to the rating.  This is a
-coordinate list format, like the format the \c cf executable takes.  The
-data::Load() function can be used to load data.
-
-The following examples detail a few ways that the \c CF class can be used.
-
-@subsection ex1_cf_cpp CF with default parameters
-
-This example constructs the \c CF object with default parameters and obtains
-recommendations for each user, storing the output in the \c recommendations
-matrix.
-
-@code
-#include <mlpack/methods/cf/cf.hpp>
-
-using namespace mlpack::cf;
-
-// The coordinate list of ratings that we have.
-extern arma::mat data;
-// The size of the neighborhood to use to get recommendations.
-extern size_t neighborhood;
-// The rank of the decomposition.
-extern size_t rank;
-
-// Build the CF object and perform the decomposition.
-// The constructor takes a default-constructed factorizer, which, by default,
-// is of type amf::NMFALSFactorizer.
-CF cf(data, amf::NMFALSFactorizer(), neighborhood, rank);
-
-// Store the results in this object.
-arma::Mat<size_t> recommendations;
-
-// Get 5 recommendations for all users.
-cf.GetRecommendations(5, recommendations);
-@endcode
-
-@subsection ex2_cf_cpp CF with other factorizers
-
-\b mlpack provides a number of existing factorizers which can be used in place
-of the default mlpack::amf::NMFALSFactorizer (which is non-negative matrix
-factorization with alternating least squares update rules).  These include:
-
- - mlpack::amf::SVDBatchFactorizer
- - mlpack::amf::SVDCompleteIncrementalFactorizer
- - mlpack::amf::SVDIncompleteIncrementalFactorizer
- - mlpack::amf::NMFALSFactorizer
- - mlpack::svd::RegularizedSVD
- - mlpack::svd::QUIC_SVD
-
-The amf::AMF<> class has many other possibilities than those listed here; it is
-a framework for alternating matrix factorization techniques.  See the
-\ref mlpack::amf::AMF<> "class documentation" or \ref amftutorial "tutorial on AMF" for
-more information.
-
-The use of another factorizer is straightforward; the example from the previous
-section is adapted below to use svd::RegularizedSVD:
-
-@code
-#include <mlpack/methods/cf/cf.hpp>
-#include <mlpack/methods/regularized_svd/regularized_svd.hpp>
-
-using namespace mlpack::cf;
-
-// The coordinate list of ratings that we have.
-extern arma::mat data;
-// The size of the neighborhood to use to get recommendations.
-extern size_t neighborhood;
-// The rank of the decomposition.
-extern size_t rank;
-
-// Build the CF object and perform the decomposition.
-CF cf(data, svd::RegularizedSVD(), neighborhood, rank);
-
-// Store the results in this object.
-arma::Mat<size_t> recommendations;
-
-// Get 5 recommendations for all users.
-cf.GetRecommendations(5, recommendations);
-@endcode
-
-@subsection ex3_cf_cpp Predicting individual user/item ratings
-
-The \c Predict() method can be used to predict the rating of an item by a
-certain user, using the same neighborhood-based approach as the
-\c GetRecommendations() function or the \c cf executable.  Below is an example
-of the use of that function.
-
-The example below will obtain the predicted rating for item 50 by user 12.
-
-@code
-#include <mlpack/methods/cf/cf.hpp>
-
-using namespace mlpack::cf;
-
-// The coordinate list of ratings that we have.
-extern arma::mat data;
-// The size of the neighborhood to use to get recommendations.
-extern size_t neighborhood;
-// The rank of the decomposition.
-extern size_t rank;
-
-// Build the CF object and perform the decomposition.
-// The constructor takes a default-constructed factorizer, which, by default,
-// is of type amf::NMFALSFactorizer.
-CF cf(data, amf::NMFALSFactorizer(), neighborhood, rank);
-
-const double prediction = cf.Predict(12, 50); // User 12, item 50.
-@endcode
-
-@subsection ex4_cf_cpp Other operations with the W and H matrices
-
-Sometimes, the raw decomposed W and H matrices can be useful.  The example below
-obtains these matrices, and multiplies them against each other to obtain a
-reconstructed data matrix with no missing values.
-
-@code
-#include <mlpack/methods/cf/cf.hpp>
-
-using namespace mlpack::cf;
-
-// The coordinate list of ratings that we have.
-extern arma::mat data;
-// The size of the neighborhood to use to get recommendations.
-extern size_t neighborhood;
-// The rank of the decomposition.
-extern size_t rank;
-
-// Build the CF object and perform the decomposition.
-// The constructor takes a default-constructed factorizer, which, by default,
-// is of type amf::NMFALSFactorizer.
-CF cf(data, amf::NMFALSFactorizer(), neighborhood, rank);
-
-// References to W and H matrices.
-const arma::mat& W = cf.W();
-const arma::mat& H = cf.H();
-
-// Multiply the matrices together.
-arma::mat reconstructed = W * H;
-@endcode
-
-@section cpp_cftut Template parameters for the 'CF' class
-
-The \c CF class takes the \c FactorizerType as a template parameter to some of
-its constructors and to the \c Train() function.  The \c FactorizerType class
-defines the algorithm used for matrix factorization.  There are a number of
-existing factorizers that can be used in \b mlpack; these were detailed in the
-\ref ex2_cf_cpp "'other factorizers' example" of the previous section.
-
-The \c FactorizerType class must implement one of the two following methods:
-
- - <tt>Apply(arma::mat& data, const size_t rank, arma::mat& W, arma::mat&
-   H);</tt>
- - <tt>Apply(arma::sp_mat& data, const size_t rank, arma::mat& W, arma::mat&
-   H);</tt>
-
-The difference between these two methods is whether \c arma::mat or \c
-arma::sp_mat is used as input.  If \c arma::mat is used, then the data matrix is
-a coordinate list with three columns, as in the constructor to the \c CF class.
-If \c arma::sp_mat is used, then a sparse matrix is passed with the number of
-rows equal to the number of items and the number of columns equal to the number
-of users, and each nonzero element in the matrix corresponds to a non-missing
-rating.
-
-The method that the factorizer implements is specified via the \c
-FactorizerTraits class, which is a template metaprogramming traits class:
-
-@code
-template<typename FactorizerType>
-struct FactorizerTraits
-{
-  /**
-   * If true, then the passed data matrix is used for factorizer.Apply().
-   * Otherwise, it is modified into a form suitable for factorization.
-   */
-  static const bool UsesCoordinateList = false;
-};
-@endcode
-
-If \c FactorizerTraits<MyFactorizer>::UsesCoordinateList is \c true, then \c CF
-will try to call \c Apply() with an \c arma::mat object.  Otherwise, \c CF will
-try to call \c Apply() with an \c arma::sp_mat object.  Specifying the value of
-\c UsesCoordinateList is straightforward; provide this specialization of the
-\c FactorizerTraits class:
-
-@code
-template<>
-struct FactorizerTraits<MyFactorizer>
-{
-  static const bool UsesCoordinateList = true; // Set your value here.
-};
-@endcode
-
-The \c Apply() function also takes a reference to the matrices \c W and \c H.
-When the \c Apply() function returns, the input data matrix should be decomposed
-into these two matrices.  \c W should have number of rows equal to the number of
-items and number of columns equal to the \c rank parameter, and \c H should have
-number of rows equal to the \c rank parameter, and number of columns equal to
-the number of users.
-
-The \ref mlpack::amf::AMF<> "amf::AMF<> class" can be used as a base for
-factorizers that alternate between updating \c W and updating \c H.  A useful
-reference is the \ref amftutorial "AMF tutorial".
-
-@section further_doc_cftut Further documentation
-
-Further documentation for the \c CF class may be found in the \ref
-mlpack::cf "complete API documentation".  In addition, more information on
-the \c AMF class of factorizers may be found in its \ref mlpack::amf::AMF<>
-"complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/cf.md 4.0.1-1/doc/tutorials/cf.md
--- 3.4.2-7/doc/tutorials/cf.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/cf.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,437 @@
+# Collaborative Filtering Tutorial
+
+Collaborative filtering is an increasingly popular approach for recommender
+systems.  A typical formulation of the problem is as follows: there are `n`
+users and `m` items, and each user has rated some of the items.  We want to
+provide each user with a recommendation for an item they have not rated yet,
+which they are likely to rate highly.  In another formulation, we may want to
+predict a user's rating of an item.  This type of problem has been considered
+extensively, especially in the context of the Netflix prize.  The winning
+approach for the Netflix prize was a collaborative filtering approach which
+utilized matrix decomposition.  More information on their approach can be found
+in the following paper:
+
+```
+@article{koren2009matrix,
+  title={Matrix factorization techniques for recommender systems},
+  author={Koren, Yehuda and Bell, Robert and Volinsky, Chris},
+  journal={Computer},
+  number={8},
+  pages={30--37},
+  year={2009},
+  publisher={IEEE}
+}
+```
+
+The key to this approach is that the data is represented as an incomplete matrix
+`V` with size `n x m`, where `V_ij` represents user `i`'s rating of item `j`, if
+that rating exists.  The task, then, is to complete the entries of the matrix.
+
+In the matrix factorization framework, the matrix `V` is assumed to be low-rank
+and decomposed into components as `V ~ WH` according to some heuristic.
+
+In order to solve problems of this form, mlpack provides both an easy-to-use
+binding (detailed here as a command-line program), and a simple yet flexible C++
+API that allows the implementation of new collaborative filtering techniques.
+
+## The `mlpack_cf` program
+
+mlpack provides a command-line program, `mlpack_cf`, which is used to perform
+collaborative filtering on a given dataset.  It can provide neighborhood-based
+recommendations for users.  The algorithm used for matrix factorization is
+configurable, and the parameters of each algorithm are also configurable.  *Note
+that mlpack also provides the `cf()` function in other languages too; however,
+this tutorial focuses on the command-line program `mlpack_cr`.  It is easy to
+adapt each example to each other language, though.*
+
+The following examples detail usage of the `mlpack_cf` program.  Note that you
+can get documentation on all the possible parameters by typing:
+
+```sh
+$ mlpack_cf --help
+```
+
+### Input format for `mlpack_cf`
+
+The input file for the `mlpack_cf` program is specified with the
+`--training_file` or `-t` option.  This file is a coordinate-format sparse
+matrix, similar to the Matrix Market (MM) format.  The first coordinate is the
+user id; the second coordinate is the item id; and the third coordinate is the
+rating.  So, for instance, a dataset with 3 users and 2 items, and ratings
+between 1 and 5, might look like the following:
+
+```sh
+$ cat dataset.csv
+0, 1, 4
+1, 0, 5
+1, 1, 1
+2, 0, 2
+```
+
+This dataset has four ratings: user 0 has rated item 1 with a rating of 4; user
+1 has rated item 0 with a rating of 5; user 1 has rated item 1 with a rating of
+1; and user 2 has rated item 0 with a rating of 2.  Note that the user and item
+indices start from 0, and the identifiers must be numeric indices, and not
+names.
+
+The type does not necessarily need to be a csv; it can be any supported storage
+format, assuming that it is a coordinate-format file in the format specified
+above.  For more information on mlpack file formats, see the documentation for
+`mlpack::data::Load()`.
+
+### `mlpack_cf` with default parameters
+
+In this example, we have a dataset from MovieLens, and we want to use
+`mlpack_cf` with the default parameters, which will provide 5 recommendations
+for each user, and we wish to save the results in the file
+`recommendations.csv`.  Assuming that our dataset is in the file
+`MovieLens-100k.csv` and it is in the correct format, we may use the `mlpack_cf`
+executable as below:
+
+```sh
+$ mlpack_cf -t MovieLens-100k.csv -v -o recommendations.csv
+```
+
+The `-v` option provides verbose output, and may be omitted if desired.  Now,
+for each user, we have recommendations in `recommendations.csv`:
+
+```sh
+$ head recommendations.csv
+317,422,482,356,495
+116,120,180,6,327
+312,49,116,99,236
+312,116,99,236,285
+55,190,317,194,63
+171,209,180,175,95
+208,0,94,87,57
+99,97,0,203,172
+257,99,180,287,0
+171,203,172,209,88
+```
+
+So, for user 0, the top 5 recommended items that user 0 has not rated are items
+317, 422, 482, 356, and 495.  For user 5, the recommendations are on the sixth
+line: 171, 209, 180, 175, 95.
+
+The `mlpack_cf` program can be built into a larger recommendation framework,
+with a preprocessing step that can turn user information and item information
+into numeric IDs, and a postprocessing step that can map these numeric IDs back
+to the original information.
+
+### Saving `mlpack_cf` models
+
+The `mlpack_cf` program is able to save a particular model for later loading.
+Saving a model can be done with the `--output_model_file` or `-M` option.  The
+example below builds a CF model on the `MovieLens-100k.csv` dataset, and then
+saves the model to the file `cf-model.xml` for later usage.
+
+```sh
+$ mlpack_cf -t MovieLens-100k.csv -M cf-model.xml -v
+```
+
+The models can also be saved as `.bin` or `.txt`; the `.xml` format provides
+a human-inspectable format (though the models tend to be quite complex and may
+be difficult to read).  These models can then be re-used to provide specific
+recommendations for certain users, or other tasks.
+
+### Loading `mlpack_cf` models
+
+Instead of training a model, the `mlpack_cf` model can also load a model to
+provide recommendations, using the `--input_model_file` or `-m` option.  For
+instance, the example below will load the model from `cf-model.xml` and then
+generate 3 recommendations for each user in the dataset, saving the results to
+`recommendations.csv`.
+
+```sh
+$ mlpack_cf -m cf-model.xml -v -o recommendations.csv
+```
+
+### Specifying rank of `mlpack_cf` decomposition
+
+By default, the matrix factorizations in the `mlpack_cf` program decompose the
+data matrix into two matrices `W` and `H` with rank two.  Often, this
+default parameter is not correct, and it makes sense to use a higher-rank
+decomposition.  The rank can be specified with the `--rank` or `-R` parameter:
+
+```sh
+$ mlpack_cf -t MovieLens-100k.csv -R 10 -v
+```
+
+In the example above, the data matrix will be decomposed into two matrices of
+rank 10.  In general, higher-rank decompositions will take longer, but will give
+more accurate predictions.
+
+### `mlpack_cf` with single-user recommendation
+
+In the previous two examples, the output file `recommendations.csv` contains
+one line for each user in the input dataset.  But often, recommendations may
+only be desired for a few users.  In that case, we can assemble a file of query
+users, with one user per line:
+
+```sh
+$ cat query.csv
+0
+17
+31
+```
+
+Now, if we run the `mlpack_cf` executable with this query file, we will obtain
+recommendations for users 0, 17, and 31:
+
+```sh
+$ mlpack_cf -i MovieLens-100k.csv -R 10 -q query.csv -o recommendations.csv
+$ cat recommendations.csv
+474,356,317,432,473
+510,172,204,483,182
+0,120,236,257,126
+```
+
+### `mlpack_cf` with non-default factorizer
+
+The `--algorithm` (or `-a`) parameter controls the factorizer that is used.
+Several options are available:
+
+ - `NMF`: non-negative matrix factorization; see `AMF`
+ - `SVDBatch`: SVD batch factorization
+ - `SVDIncompleteIncremental`: incomplete incremental SVD
+ - `SVDCompleteIncremental`: complete incremental SVD
+ - `RegSVD`: regularized SVD; see `RegularizedSVD`
+
+The default factorizer is `NMF`.  The example below uses the `RegSVD`
+factorizer:
+
+```sh
+$ mlpack_cf -i MovieLens-100k.csv -R 10 -q query.csv -a RegSVD -o recommendations.csv
+```
+
+### `mlpack_cf` with non-default neighborhood size
+
+The `mlpack_cf` program produces recommendations using a neighborhood: similar
+users in the query user's neighborhood will be averaged to produce predictions.
+The size of this neighborhood is controlled with the `--neighborhood` (or `-n`)
+option.  An example using a neighborhood with 10 similar users is below:
+
+```sh
+$ mlpack_cf -i MovieLens-100k.csv -R 10 -q query.csv -a RegSVD -n 10
+```
+
+## The `CF` class
+
+The `CF` class in mlpack offers a simple, flexible API for performing
+collaborative filtering for recommender systems within C++ applications.  In the
+constructor, the `CF` class takes a coordinate-list dataset and decomposes the
+matrix according to the specified `FactorizerType` template parameter.
+
+Then, the `GetRecommendations()` function may be called to obtain
+recommendations for certain users (or all users), and the `W()` and `H()`
+matrices may be accessed to perform other computations.
+
+The data which the `CF` constructor takes should be an Armadillo matrix
+(`arma::mat`) with three rows.  The first row corresponds to users; the second
+row corresponds to items; the third column corresponds to the rating.  This is a
+coordinate list format, like the format the `mlpack_cf` executable takes.  The
+`data::Load()` function can be used to load data.
+
+The following examples detail a few ways that the `CF` class can be used.
+
+### `CF` with default parameters
+
+This example constructs the `CF` object with default parameters and obtains
+recommendations for each user, storing the output in the `recommendations`
+matrix.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The coordinate list of ratings that we have.
+extern arma::mat data;
+// The size of the neighborhood to use to get recommendations.
+extern size_t neighborhood;
+// The rank of the decomposition.
+extern size_t rank;
+
+// Build the CF object and perform the decomposition.
+// The constructor takes a default-constructed factorizer, which, by default,
+// is of type NMFALSFactorizer.
+CF cf(data, NMFALSFactorizer(), neighborhood, rank);
+
+// Store the results in this object.
+arma::Mat<size_t> recommendations;
+
+// Get 5 recommendations for all users.
+cf.GetRecommendations(5, recommendations);
+```
+
+### `CF` with other factorizers
+
+mlpack provides a number of existing factorizers which can be used in place of
+the default `NMFALSFactorizer` (which is non-negative matrix factorization with
+alternating least squares update rules).  These include:
+
+ - `SVDBatchFactorizer`
+ - `SVDCompleteIncrementalFactorizer`
+ - `SVDIncompleteIncrementalFactorizer`
+ - `NMFALSFactorizer`
+ - `RegularizedSVD`
+ - `QUIC_SVD`
+
+The `AMF` class has many other possibilities than those listed here; it is a
+framework for alternating matrix factorization techniques.  See the `AMF` class
+documentation or [tutorial on AMF](amf.md) for more information.
+
+The use of another factorizer is straightforward; the example from the previous
+section is adapted below to use `RegularizedSVD`:
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The coordinate list of ratings that we have.
+extern arma::mat data;
+// The size of the neighborhood to use to get recommendations.
+extern size_t neighborhood;
+// The rank of the decomposition.
+extern size_t rank;
+
+// Build the CF object and perform the decomposition.
+CF cf(data, RegularizedSVD(), neighborhood, rank);
+
+// Store the results in this object.
+arma::Mat<size_t> recommendations;
+
+// Get 5 recommendations for all users.
+cf.GetRecommendations(5, recommendations);
+```
+
+### Predicting individual user/item ratings
+
+The `Predict()` method can be used to predict the rating of an item by a certain
+user, using the same neighborhood-based approach as the `GetRecommendations()`
+function or the `mlpack_cf` executable.  Below is an example of the use of that
+function.
+
+The example below will obtain the predicted rating for item 50 by user 12.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The coordinate list of ratings that we have.
+extern arma::mat data;
+// The size of the neighborhood to use to get recommendations.
+extern size_t neighborhood;
+// The rank of the decomposition.
+extern size_t rank;
+
+// Build the CF object and perform the decomposition.
+// The constructor takes a default-constructed factorizer, which, by default,
+// is of type NMFALSFactorizer.
+CF cf(data, NMFALSFactorizer(), neighborhood, rank);
+
+const double prediction = cf.Predict(12, 50); // User 12, item 50.
+```
+
+### Other operations with the `W` and `H` matrices
+
+Sometimes, the raw decomposed `W` and `H` matrices can be useful.  The example
+below obtains these matrices, and multiplies them against each other to obtain a
+reconstructed data matrix with no missing values.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The coordinate list of ratings that we have.
+extern arma::mat data;
+// The size of the neighborhood to use to get recommendations.
+extern size_t neighborhood;
+// The rank of the decomposition.
+extern size_t rank;
+
+// Build the CF object and perform the decomposition.
+// The constructor takes a default-constructed factorizer, which, by default,
+// is of type NMFALSFactorizer.
+CF cf(data, NMFALSFactorizer(), neighborhood, rank);
+
+// References to W and H matrices.
+const arma::mat& W = cf.W();
+const arma::mat& H = cf.H();
+
+// Multiply the matrices together.
+arma::mat reconstructed = W * H;
+```
+
+## Template parameters for the `CF` class
+
+The `CF` class takes the `FactorizerType` as a template parameter to some of
+its constructors and to the `Train()` function.  The `FactorizerType` class
+defines the algorithm used for matrix factorization.  There are a number of
+existing factorizers that can be used in mlpack; these were detailed in the
+'other factorizers' example of the previous section.
+
+The `FactorizerType` class must implement one of the two following methods:
+
+ - `Apply(arma::mat& data, const size_t rank, arma::mat& W, arma::mat&
+   H);`
+ - `Apply(arma::sp_mat& data, const size_t rank, arma::mat& W, arma::mat&
+   H);`
+
+The difference between these two methods is whether `arma::mat` or
+`arma::sp_mat` is used as input.  If `arma::mat` is used, then the data matrix
+is a coordinate list with three columns, as in the constructor to the `CF`
+class.  If `arma::sp_mat` is used, then a sparse matrix is passed with the
+number of rows equal to the number of items and the number of columns equal to
+the number of users, and each nonzero element in the matrix corresponds to a
+non-missing rating.
+
+The method that the factorizer implements is specified via the \c
+FactorizerTraits class, which is a template metaprogramming traits class:
+
+```c++
+template<typename FactorizerType>
+struct FactorizerTraits
+{
+  /**
+   * If true, then the passed data matrix is used for factorizer.Apply().
+   * Otherwise, it is modified into a form suitable for factorization.
+   */
+  static const bool UsesCoordinateList = false;
+};
+```
+
+If `FactorizerTraits<MyFactorizer>::UsesCoordinateList` is `true`, then `CF`
+will try to call `Apply()` with an `arma::mat` object.  Otherwise, `CF` will try
+to call `Apply()` with an `arma::sp_mat` object.  Specifying the value of
+`UsesCoordinateList` is straightforward; provide this specialization of the
+`FactorizerTraits` class:
+
+```c++
+template<>
+struct FactorizerTraits<MyFactorizer>
+{
+  static const bool UsesCoordinateList = true; // Set your value here.
+};
+```
+
+The `Apply()` function also takes a reference to the matrices `W` and `H`.
+When the `Apply()` function returns, the input data matrix should be decomposed
+into these two matrices.  `W` should have number of rows equal to the number of
+items and number of columns equal to the `rank` parameter, and `H` should have
+number of rows equal to the `rank` parameter, and number of columns equal to
+the number of users.
+
+The `AMF` class can be used as a base for factorizers that alternate between
+updating `W` and updating `H`.  A useful reference is the [AMF
+tutorial](amf.md).
+
+## Further documentation
+
+Further documentation for the `CF` class may be found in the comments in the
+source code of the files in `src/mlpack/methods/cf/`.  In addition, more
+information on the `AMF` class of factorizers may be found in the sources for
+`AMF`, in `src/mlpack/methods/amf/`.
diff -pruN 3.4.2-7/doc/tutorials/datasetmapper.md 4.0.1-1/doc/tutorials/datasetmapper.md
--- 3.4.2-7/doc/tutorials/datasetmapper.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/datasetmapper.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,186 @@
+# DatasetMapper tutorial
+
+`DatasetMapper` is a class which holds information about a dataset. This can be
+used when dataset contains categorical non-numeric features which should be
+mapped to numeric features. A simple example can be
+
+```
+7,5,True,3
+6,3,False,4
+4,8,False,2
+9,3,True,3
+```
+
+The above dataset will be represented as
+
+```
+7,5,0,3
+6,3,1,4
+4,8,1,2
+9,3,0,3
+```
+
+Here the mappings are
+
+- `True` mapped to `0`
+- `False` mapped to `1`
+
+**Note**: `DatasetMapper` converts non-numeric values in the order in which it
+encounters them in the dataset. Therefore there is a chance that `True` might
+get mapped to `0` if it encounters `True` before `False`.  This `0` and `1` are
+not to be confused with C++ `bool` notations. These are mapping created by
+`mlpack::DatasetMapper`.
+
+`DatasetMapper` provides an easy API to load such data and stores all the
+necessary information of the dataset.
+
+## Loading data
+
+To use `DatasetMapper` we have to call a specific overload of the `data::Load()`
+function.
+
+```c++
+using namespace mlpack;
+
+arma::mat data;
+data::DatasetMapper info;
+data::Load("dataset.csv", data, info);
+```
+
+Dataset:
+
+```
+7, 5, True, 3
+6, 3, False, 4
+4, 8, False, 2
+9, 3, True, 3
+```
+
+## Dimensionality
+
+There are two ways to initialize a DatasetMapper object.
+
+* The first is to initialize the object and set each property yourself.
+
+* The second is to pass the object to `Load()` without initialization, and
+  mlpack will populate the object. If we use the latter option then the
+  dimensionality will be same as what's in the data file.
+
+```c++
+std::cout << info.Dimensionality();
+```
+
+```
+4
+```
+
+## Type of each dimension
+
+Each dimension can be of either of the two types:
+
+  - `data::Datatype::numeric`
+  - `data::Datatype::categorical`
+
+The function `Type(size_t dimension)` takes an argument dimension which is the
+row number for which you want to know the type
+
+This will return an enum `data::Datatype`, which is cast to `size_t` when we
+print them using `std::cout`.
+
+  - `0` represents `data::Datatype::numeric`
+  - `1` represents `data::Datatype::categorical`
+
+```c++
+std::cout << info.Type(0) << "\n";
+std::cout << info.Type(1) << "\n";
+std::cout << info.Type(2) << "\n";
+std::cout << info.Type(3) << "\n";
+```
+
+This produces:
+
+```
+0
+0
+1
+0
+```
+
+## Number of mappings
+
+If the type of a dimension is `data::Datatype::categorical`, then during
+loading, each unique token in that dimension will be mapped to an integer
+starting with `0`.
+
+`NumMappings(size_t dimension)` takes `dimension` as an argument and returns the
+number of mappings in that dimension, if the dimension is numeric, or there are
+no mappings, then it will return 0.
+
+```c++
+std::cout << info.NumMappings(0) << "\n";
+std::cout << info.NumMappings(1) << "\n";
+std::cout << info.NumMappings(2) << "\n";
+std::cout << info.NumMappings(3) << "\n";
+```
+
+will print:
+
+```
+0
+0
+2
+0
+```
+
+## Checking mappings
+
+There are two ways to check the mappings.
+
+  - Enter the string to get mapped integer
+  - Enter the mapped integer to get string
+
+### `UnmapString()`
+
+The `UnmapString()` function has the full signature `UnmapString(int value,
+size_t dimension, size_t unmappingIndex = 0UL)`.
+
+  - `value` is the integer for which you want to find the mapped value
+  - `dimension` is the dimension in which you want to check the mappings
+
+```c++
+std::cout << info.UnmapString(0, 2) << "\n";
+std::cout << info.UnmapString(1, 2) << "\n";
+```
+
+This will print:
+
+```
+T
+F
+```
+
+### `UnmapValue()`
+
+The `UnmapValue()` function has the signature `UnmapValue(const std::string
+&input, size_t dimension)`.
+
+  - `input` is the mapped value for which you want to find mapping
+  - `dimension` is the dimension in which you want to find the mapped value
+
+```c++
+std::cout << info.UnmapValue("T", 2) << "\n";
+std::cout << info.UnmapValue("F", 2) << "\n";
+```
+
+will produce:
+
+```
+0
+1
+```
+
+## Further documentation
+
+For further documentation on `DatasetMapper` and its uses, see the comments in
+the source code in `src/mlpack/core/data/`, as well as its uses in the [examples
+repository](https://github.com/mlpack/examples).
diff -pruN 3.4.2-7/doc/tutorials/det/det.txt 4.0.1-1/doc/tutorials/det/det.txt
--- 3.4.2-7/doc/tutorials/det/det.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/det/det.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,374 +0,0 @@
-/*!
-
-@file det.txt
-@author Parikshit Ram
-@brief Tutorial for how to perform density estimation with Density Estimation Trees (DET).
-
-@page dettutorial Density Estimation Tree (DET) tutorial
-
-@section intro_det_tut Introduction
-
-DETs perform the unsupervised task of density estimation using decision trees.
-Using a trained density estimation tree (DET), the density at any particular
-point can be estimated very quickly (O(log n) time, where n is the number of
-points the tree is built on).
-
-The details of this work is presented in the following paper:
-@code
-@inproceedings{ram2011density,
-  title={Density estimation trees},
-  author={Ram, P. and Gray, A.G.},
-  booktitle={Proceedings of the 17th ACM SIGKDD International Conference on
-      Knowledge Discovery and Data Mining},
-  pages={627--635},
-  year={2011},
-  organization={ACM}
-}
-@endcode
-
-\b mlpack provides:
-
- - a \ref cli_det_tut "simple command-line executable" to perform density estimation and related analyses using DETs
- - a \ref dtree_det_tut "generic C++ class (DTree)" which provides various functionality for the DETs
- - a set of functions in the namespace \ref dtutils_det_tut "mlpack::det" to perform cross-validation for the task of density estimation with DETs
-
-@section toc_det_tut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_det_tut
- - \ref toc_det_tut
- - \ref cli_det_tut
-   - \ref cli_ex1_de_tut
-   - \ref cli_ex2_de_test_tut
-   - \ref cli_ex4_de_vi_tut
-   - \ref cli_ex6_de_save
-   - \ref cli_ex7_de_load
- - \ref dtree_det_tut
-   - \ref dtree_pub_func_det_tut
- - \ref dtutils_det_tut
-   - \ref dtutils_util_funcs
- - \ref further_doc_det_tut
-
-@section cli_det_tut Command-Line mlpack_det
-The command line arguments of this program can be viewed using the \c -h option:
-
-@code
-$ mlpack_det -h
-Density Estimation With Density Estimation Trees
-
-  This program performs a number of functions related to Density Estimation
-  Trees.  The optimal Density Estimation Tree (DET) can be trained on a set of
-  data (specified by --training_file or -t) using cross-validation (with number
-  of folds specified by --folds).  This trained density estimation tree may then
-  be saved to a model file with the --output_model_file (-M) option.
-
-  The variable importances of each dimension may be saved with the --vi_file
-  (-i) option, and the density estimates on each training point may be saved to
-  the file specified with the --training_set_estimates_file (-e) option.
-
-  This program also can provide density estimates for a set of test points,
-  specified in the --test_file (-T) file.  The density estimation tree used for
-  this task will be the tree that was trained on the given training points, or a
-  tree stored in the file given with the --input_model_file (-m) parameter.  The
-  density estimates for the test points may be saved into the file specified
-  with the --test_set_estimates_file (-E) option.
-
-
-Options:
-
-  --folds (-f) [int]            The number of folds of cross-validation to
-                                perform for the estimation (0 is LOOCV)  Default
-                                value 10.
-  --help (-h)                   Default help info.
-  --info [string]               Get help on a specific module or option.
-                                Default value ''.
-  --input_model_file (-m) [string]
-                                File containing already trained density
-                                estimation tree.  Default value ''.
-  --max_leaf_size (-L) [int]    The maximum size of a leaf in the unpruned,
-                                fully grown DET.  Default value 10.
-  --min_leaf_size (-l) [int]    The minimum size of a leaf in the unpruned,
-                                fully grown DET.  Default value 5.
-  --output_model_file (-M) [string]
-                                File to save trained density estimation tree to.
-                                 Default value ''.
-  --test_file (-T) [string]     A set of test points to estimate the density of.
-                                 Default value ''.
-  --test_set_estimates_file (-E) [string]
-                                The file in which to output the estimates on the
-                                test set from the final optimally pruned tree.
-                                Default value ''.
-  --training_file (-t) [string]
-                                The data set on which to build a density
-                                estimation tree.  Default value ''.
-  --training_set_estimates_file (-e) [string]
-                                The file in which to output the density
-                                estimates on the training set from the final
-                                optimally pruned tree.  Default value ''.
-  --verbose (-v)                Display informational messages and the full list
-                                of parameters and timers at the end of
-                                execution.
-  --version (-V)                Display the version of mlpack.
-  --vi_file (-i) [string]       The file to output the variable importance
-                                values for each feature.  Default value ''.
-
-For further information, including relevant papers, citations, and theory,
-consult the documentation found at http://www.mlpack.org or included with your
-distribution of mlpack.
-@endcode
-
-@subsection cli_ex1_de_tut Plain-vanilla density estimation
-
-We can just train a DET on the provided data set \e S.  Like all datasets
-\b mlpack uses, the data should be row-major (\b mlpack transposes data when it
-is loaded; internally, the data is column-major -- see \ref matrices "this page"
-for more information).
-
-@code
-$ mlpack_det -t dataset.csv -v
-@endcode
-
-By default, \c mlpack_det performs 10-fold cross-validation (using the
-\f$\alpha\f$-pruning regularization for decision trees). To perform LOOCV
-(leave-one-out cross-validation), which can provide better results but will take
-longer, use the following command:
-
-@code
-$ mlpack_det -t dataset.csv -f 0 -v
-@endcode
-
-To perform k-fold crossvalidation, use \c -f \c k (or \c --folds \c k). There
-are certain other options available for training. For example, in the
-construction of the initial tree, you can specify the maximum and minimum leaf
-sizes. By default, they are 10 and 5 respectively; you can set them using the \c
--M (\c --max_leaf_size) and the \c -N (\c --min_leaf_size) options.
-
-@code
-$ mlpack_det -t dataset.csv -M 20 -N 10
-@endcode
-
-In case you want to output the density estimates at the points in the training
-set, use the \c -e (\c --training_set_estimates_file) option to specify the
-output file to which the estimates will be saved.  The first line in
-density_estimates.txt will correspond to the density at the first point in the
-training set.  Note that the logarithm of the density estimates are given, which
-allows smaller estimates to be saved.
-
-@code
-$ mlpack_det -t dataset.csv -e density_estimates.txt -v
-@endcode
-
-@subsection cli_ex2_de_test_tut Estimation on a test set
-
-Often, it is useful to train a density estimation tree on a training set and
-then obtain density estimates from the learned estimator for a separate set of
-test points.  The \c -T (\c --test_file) option allows specification of a set of
-test points, and the \c -E (\c --test_set_estimates_file) option allows
-specification of the file into which the test set estimates are saved.  Note
-that the logarithm of the density estimates are saved; this allows smaller
-values to be saved.
-
-@code
-$ mlpack_det -t dataset.csv -T test_points.csv -E test_density_estimates.txt -v
-@endcode
-
-@subsection cli_ex4_de_vi_tut Computing the variable importance
-
-The variable importance (with respect to density estimation) of the different
-features in the data set can be obtained by using the \c -i (\c --vi_file )
-option. This outputs the absolute (as opposed to relative) variable importance
-of the all the features into the specified file.
-
-@code
-$ mlpack_det -t dataset.csv -i variable_importance.txt -v
-@endcode
-
-@subsection cli_ex6_de_save Saving trained DETs
-
-The \c mlpack_det program is capable of saving a trained DET to a file for later
-usage.  The \c --output_model_file or \c -M option allows specification of the
-file to save to.  In the example below, a DET trained on \c dataset.csv is saved
-to the file \c det.xml.
-
-@code
-$ mlpack_det -t dataset.csv -M det.xml -v
-@endcode
-
-@subsection cli_ex7_de_load Loading trained DETs
-
-A saved DET can be used to perform any of the functionality in the examples
-above.  A saved DET is loaded with the \c --input_model_file or \c -m option.
-The example below loads a saved DET from \c det.xml and outputs density
-estimates on the dataset \c test_dataset.csv into the file \c estimates.csv.
-
-@code
-$ mlpack_det -m det.xml -T test_dataset.csv -E estimates.csv -v
-@endcode
-
-@section dtree_det_tut The 'DTree' class
-
-This class implements density estimation trees.  Below is a simple example which
-initializes a density estimation tree.
-
-@code
-#include <mlpack/methods/det/dtree.hpp>
-
-using namespace mlpack::det;
-
-// The dataset matrix, on which to learn the density estimation tree.
-extern arma::Mat<float> data;
-
-// Initialize the tree.  This function also creates and saves the bounding box
-// of the data.  Note that it does not actually build the tree.
-DTree<> det(data);
-@endcode
-
-@subsection dtree_pub_func_det_tut Public Functions
-
-The function \c Grow() greedily grows the tree, adding new points to the tree.
-Note that the points in the dataset will be reordered.  This should only be run
-on a tree which has not already been built.  In general, it is more useful to
-use the \c Trainer() function found in \ref dtutils_det_tut.
-
-@code
-// This keeps track of the data during the shuffle that occurs while growing the
-// tree.
-arma::Col<size_t> oldFromNew(data.n_cols);
-for (size_t i = 0; i < data.n_cols; i++)
-  oldFromNew[i] = i;
-
-// This function grows the tree down to the leaves. It returns the current
-// minimum value of the regularization parameter alpha.
-size_t maxLeafSize = 10;
-size_t minLeafSize = 5;
-
-double alpha = det.Grow(data, oldFromNew, false, maxLeafSize, minLeafSize);
-@endcode
-
-Note that the alternate volume regularization should not be used (see ticket
-#238).
-
-To estimate the density at a given query point, use the following code.  Note
-that the logarithm of the density is returned.
-
-@code
-// For a given query, you can obtain the density estimate.
-extern arma::Col<float> query;
-extern DTree* det;
-double estimate = det->ComputeValue(&query);
-@endcode
-
-Computing the \b variable \b importance of each feature for the given DET.
-
-@code
-// The data matrix and density estimation tree.
-extern arma::mat data;
-extern DTree* det;
-
-// The variable importances will be saved into this vector.
-arma::Col<double> varImps;
-
-// You can obtain the variable importance from the current tree.
-det->ComputeVariableImportance(varImps);
-@endcode
-
-@section dtutils_det_tut 'namespace mlpack::det'
-
-The functions in this namespace allows the user to perform tasks with the
-'DTree' class.  Most importantly, the \c Trainer() method allows the full
-training of a density estimation tree with cross-validation.  There are also
-utility functions which allow printing of leaf membership and variable
-importance.
-
-@subsection dtutils_util_funcs Utility Functions
-
-The code below details how to train a density estimation tree with
-cross-validation.
-
-@code
-#include <mlpack/methods/det/dt_utils.hpp>
-
-using namespace mlpack::det;
-
-// The dataset matrix, on which to learn the density estimation tree.
-extern arma::Mat<float> data;
-
-// The number of folds for cross-validation.
-const size_t folds = 10; // Set folds = 0 for LOOCV.
-
-const size_t maxLeafSize = 10;
-const size_t minLeafSize = 5;
-
-// Train the density estimation tree with cross-validation.
-DTree<>* dtree_opt = Trainer(data, folds, false, maxLeafSize, minLeafSize);
-@endcode
-
-Note that the alternate volume regularization should be set to false because it
-has known bugs (see #238).
-
-To print the class membership of leaves in the tree into a file, see the
-following code.
-
-@code
-extern arma::Mat<size_t> labels;
-extern DTree* det;
-const size_t numClasses = 3; // The number of classes must be known.
-
-extern string leafClassMembershipFile;
-
-PrintLeafMembership(det, data, labels, numClasses, leafClassMembershipFile);
-@endcode
-
-Note that you can find the number of classes with \c max(labels) \c + \c 1.
-The variable importance can also be printed to a file in a similar manner.
-
-@code
-extern DTree* det;
-
-extern string variableImportanceFile;
-const size_t numFeatures = data.n_rows;
-
-PrintVariableImportance(det, numFeatures, variableImportanceFile);
-@endcode
-
-@section further_doc_det_tut Further Documentation
-For further documentation on the DTree class, consult the
-\ref mlpack::det::DTree "complete API documentation".
-
-*/
-
------ this option is not available in DET right now; see #238! -----
-@subsection cli_alt_reg_tut Alternate DET regularization
-
-The usual regularized error \f$R_\alpha(t)\f$ of a node \f$t\f$ is given by:
-\f$R_\alpha(t) = R(t) + \alpha |\tilde{t}|\f$ where
-
-\f{
-R(t) = -\frac{|t|^2}{N^2 V(t)}.
-\f}
-
-\f$V(t)\f$ is the volume of the node \f$t\f$ and \f$\tilde{t}\f$ is
-the set of leaves in the subtree rooted at \f$t\f$.
-
-For the purposes of density estimation, there is a different form of
-regularization: instead of penalizing the number of leaves in the subtree, we
-penalize the sum of the inverse of the volumes of the leaves.  With this
-regularization, very small volume nodes are discouraged unless the data actually
-warrants it. Thus,
-
-\f[
-R_\alpha'(t) = R(t) + \alpha I_v(\tilde{t})
-\f]
-
-where
-
-\f[
-I_v(\tilde{t}) = \sum_{l \in \tilde{t}} \frac{1}{V(l)}.
-\f]
-
-To use this form of regularization, use the \c -R flag.
-
-@code
-$ mlpack_det -t dataset.csv -R -v
-@endcode
diff -pruN 3.4.2-7/doc/tutorials/det.md 4.0.1-1/doc/tutorials/det.md
--- 3.4.2-7/doc/tutorials/det.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/det.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,322 @@
+# Density estimation tree (DET) tutorial
+
+DETs perform the unsupervised task of density estimation using decision trees.
+Using a trained density estimation tree (DET), the density at any particular
+point can be estimated very quickly (`O(log n)` time, where `n` is the number of
+points the tree is built on).
+
+The details of this work is presented in the following paper:
+
+```
+@inproceedings{ram2011density,
+  title={Density estimation trees},
+  author={Ram, P. and Gray, A.G.},
+  booktitle={Proceedings of the 17th ACM SIGKDD International Conference on
+      Knowledge Discovery and Data Mining},
+  pages={627--635},
+  year={2011},
+  organization={ACM}
+}
+```
+
+mlpack provides:
+
+ - a simple command-line executable to perform density estimation and related
+   analyses using DETs
+ - a generic C++ class (`DTree`) which provides various functionality for the
+   DETs
+ - a set of functions in the namespace `mlpack::det` to perform cross-validation
+   for the task of density estimation with DETs
+
+## Command-line `mlpack_det`
+
+*(Note: this section was written for the command-line program `mlpack_det`, but
+a `det()` function is available for other languages via mlpack's bindings
+system.  The options are so similar that it is easy to adapt the examples here
+to another language.)*
+
+The command line arguments of this program can be viewed using the `-h` option:
+
+```sh
+$ mlpack_det -h
+Density Estimation With Density Estimation Trees
+
+  This program performs a number of functions related to Density Estimation
+  Trees.  The optimal Density Estimation Tree (DET) can be trained on a set of
+  data (specified by --training_file or -t) using cross-validation (with number
+  of folds specified by --folds).  This trained density estimation tree may then
+  be saved to a model file with the --output_model_file (-M) option.
+
+  The variable importances of each dimension may be saved with the --vi_file
+  (-i) option, and the density estimates on each training point may be saved to
+  the file specified with the --training_set_estimates_file (-e) option.
+
+  This program also can provide density estimates for a set of test points,
+  specified in the --test_file (-T) file.  The density estimation tree used for
+  this task will be the tree that was trained on the given training points, or a
+  tree stored in the file given with the --input_model_file (-m) parameter.  The
+  density estimates for the test points may be saved into the file specified
+  with the --test_set_estimates_file (-E) option.
+
+
+Options:
+
+  --folds (-f) [int]            The number of folds of cross-validation to
+                                perform for the estimation (0 is LOOCV)  Default
+                                value 10.
+  --help (-h)                   Default help info.
+  --info [string]               Get help on a specific module or option.
+                                Default value ''.
+  --input_model_file (-m) [string]
+                                File containing already trained density
+                                estimation tree.  Default value ''.
+  --max_leaf_size (-L) [int]    The maximum size of a leaf in the unpruned,
+                                fully grown DET.  Default value 10.
+  --min_leaf_size (-l) [int]    The minimum size of a leaf in the unpruned,
+                                fully grown DET.  Default value 5.
+  --output_model_file (-M) [string]
+                                File to save trained density estimation tree to.
+                                 Default value ''.
+  --test_file (-T) [string]     A set of test points to estimate the density of.
+                                 Default value ''.
+  --test_set_estimates_file (-E) [string]
+                                The file in which to output the estimates on the
+                                test set from the final optimally pruned tree.
+                                Default value ''.
+  --training_file (-t) [string]
+                                The data set on which to build a density
+                                estimation tree.  Default value ''.
+  --training_set_estimates_file (-e) [string]
+                                The file in which to output the density
+                                estimates on the training set from the final
+                                optimally pruned tree.  Default value ''.
+  --verbose (-v)                Display informational messages and the full list
+                                of parameters and timers at the end of
+                                execution.
+  --version (-V)                Display the version of mlpack.
+  --vi_file (-i) [string]       The file to output the variable importance
+                                values for each feature.  Default value ''.
+
+For further information, including relevant papers, citations, and theory,
+consult the documentation found at http://www.mlpack.org or included with your
+distribution of mlpack.
+```
+
+### Plain-vanilla density estimation
+
+We can just train a DET on the provided data set `S`.  Like all datasets
+mlpack uses, the data should be row-major (mlpack transposes data when it
+is loaded; internally, the data is column-major---see [this
+page](../user/matrices.md) for more information).
+
+```sh
+$ mlpack_det -t dataset.csv -v
+```
+
+By default, `mlpack_det` performs 10-fold cross-validation (using the
+alpha-pruning regularization for decision trees). To perform LOOCV
+(leave-one-out cross-validation), which can provide better results but will take
+longer, use the following command:
+
+```sh
+$ mlpack_det -t dataset.csv -f 0 -v
+```
+
+To perform `k`-fold crossvalidation, use `-f k` (or `--folds k`). There are
+certain other options available for training. For example, in the construction
+of the initial tree, you can specify the maximum and minimum leaf sizes. By
+default, they are 10 and 5 respectively; you can set them using the `-M`
+(`--max_leaf_size`) and the `-N` (`--min_leaf_size`) options.
+
+```sh
+$ mlpack_det -t dataset.csv -M 20 -N 10
+```
+
+In case you want to output the density estimates at the points in the training
+set, use the `-e` (`--training_set_estimates_file`) option to specify the output
+file to which the estimates will be saved.  The first line in
+`density_estimates.txt` will correspond to the density at the first point in the
+training set.  Note that the logarithm of the density estimates are given, which
+allows smaller estimates to be saved.
+
+```sh
+$ mlpack_det -t dataset.csv -e density_estimates.txt -v
+```
+
+### Estimation on a test set
+
+Often, it is useful to train a density estimation tree on a training set and
+then obtain density estimates from the learned estimator for a separate set of
+test points.  The `-T` (`--test_file`) option allows specification of a set of
+test points, and the `-E` (`--test_set_estimates_file`) option allows
+specification of the file into which the test set estimates are saved.  Note
+that the logarithm of the density estimates are saved; this allows smaller
+values to be saved.
+
+```sh
+$ mlpack_det -t dataset.csv -T test_points.csv -E test_density_estimates.txt -v
+```
+
+### Computing the variable importance
+
+The variable importance (with respect to density estimation) of the different
+features in the data set can be obtained by using the `-i` (`--vi_file`) option.
+This outputs the absolute (as opposed to relative) variable importance of the
+all the features into the specified file.
+
+```sh
+$ mlpack_det -t dataset.csv -i variable_importance.txt -v
+```
+
+### Saving trained DETs
+
+The `mlpack_det` program is capable of saving a trained DET to a file for later
+usage.  The `--output_model_file` or `-M` option allows specification of the
+file to save to.  In the example below, a DET trained on `dataset.csv` is saved
+to the file `det.xml`.
+
+```sh
+$ mlpack_det -t dataset.csv -M det.xml -v
+```
+
+### Loading trained DETs
+
+A saved DET can be used to perform any of the functionality in the examples
+above.  A saved DET is loaded with the `--input_model_file` or `-m` option.  The
+example below loads a saved DET from `det.xml` and outputs density estimates on
+the dataset `test_dataset.csv` into the file `estimates.csv`.
+
+```sh
+$ mlpack_det -m det.xml -T test_dataset.csv -E estimates.csv -v
+```
+
+## The `DTree` class
+
+This class implements density estimation trees.  Below is a simple example which
+initializes a density estimation tree.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The dataset matrix, on which to learn the density estimation tree.
+extern arma::Mat<float> data;
+
+// Initialize the tree.  This function also creates and saves the bounding box
+// of the data.  Note that it does not actually build the tree.
+DTree<> det(data);
+```
+
+### Public functions
+
+The function `Grow()` greedily grows the tree, adding new points to the tree.
+Note that the points in the dataset will be reordered.  This should only be run
+on a tree which has not already been built.  In general, it is more useful to
+use the `Trainer()` function, detailed later.
+
+```c++
+// This keeps track of the data during the shuffle that occurs while growing the
+// tree.
+arma::Col<size_t> oldFromNew(data.n_cols);
+for (size_t i = 0; i < data.n_cols; i++)
+  oldFromNew[i] = i;
+
+// This function grows the tree down to the leaves. It returns the current
+// minimum value of the regularization parameter alpha.
+size_t maxLeafSize = 10;
+size_t minLeafSize = 5;
+
+double alpha = det.Grow(data, oldFromNew, false, maxLeafSize, minLeafSize);
+```
+
+Note that the alternate volume regularization should not be used (see
+[#238](https://github.com/mlpack/mlpack/issues/238)).
+
+To estimate the density at a given query point, use the following code.  Note
+that the logarithm of the density is returned.
+
+```c++
+// For a given query, you can obtain the density estimate.
+extern arma::Col<float> query;
+extern DTree* det;
+double estimate = det->ComputeValue(&query);
+```
+
+Computing the *variable importance* of each feature for the given DET.
+
+```c++
+// The data matrix and density estimation tree.
+extern arma::mat data;
+extern DTree* det;
+
+// The variable importances will be saved into this vector.
+arma::Col<double> varImps;
+
+// You can obtain the variable importance from the current tree.
+det->ComputeVariableImportance(varImps);
+```
+
+## The `mlpack::det` namespace
+
+The functions in this namespace allows the user to perform tasks with the
+`DTree` class.  Most importantly, the `Trainer()` method allows the full
+training of a density estimation tree with cross-validation.  There are also
+utility functions which allow printing of leaf membership and variable
+importance.
+
+### Utility functions
+
+The code below details how to train a density estimation tree with
+cross-validation.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The dataset matrix, on which to learn the density estimation tree.
+extern arma::Mat<float> data;
+
+// The number of folds for cross-validation.
+const size_t folds = 10; // Set folds = 0 for LOOCV.
+
+const size_t maxLeafSize = 10;
+const size_t minLeafSize = 5;
+
+// Train the density estimation tree with cross-validation.
+DTree<>* dtree_opt = Trainer(data, folds, false, maxLeafSize, minLeafSize);
+```
+
+Note that the alternate volume regularization should be set to false because it
+has known bugs (see [#238](https://github.com/mlpack/mlpack/issues/238))..
+
+To print the class membership of leaves in the tree into a file, see the
+following code.
+
+```c++
+extern arma::Mat<size_t> labels;
+extern DTree* det;
+const size_t numClasses = 3; // The number of classes must be known.
+
+extern string leafClassMembershipFile;
+
+PrintLeafMembership(det, data, labels, numClasses, leafClassMembershipFile);
+```
+
+Note that you can find the number of classes with `max(labels) + 1`.  The
+variable importance can also be printed to a file in a similar manner.
+
+```c++
+extern DTree* det;
+
+extern string variableImportanceFile;
+const size_t numFeatures = data.n_rows;
+
+PrintVariableImportance(det, numFeatures, variableImportanceFile);
+```
+
+## Further documentation
+
+For further documentation on the `DTree` class, consult the comments in the
+source code, in `mlpack/methods/det/`.
diff -pruN 3.4.2-7/doc/tutorials/emst/emst.txt 4.0.1-1/doc/tutorials/emst/emst.txt
--- 3.4.2-7/doc/tutorials/emst/emst.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/emst/emst.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,148 +0,0 @@
-/*!
-
-@file emst.txt
-@author Bill March
-@brief Tutorial for the Euclidean Minimum Spanning Tree algorithm.
-
-@page emst_tutorial EMST Tutorial
-
-@section intro_emsttut Introduction
-
-The Euclidean Minimum Spanning Tree problem is widely used in machine learning
-and data mining applications.  Given a set \f$S\f$ of points in \f$\mathbf{R}^d\f$,
-our task is to compute lowest weight spanning tree in the complete graph on \f$S\f$
-with edge weights given by the Euclidean distance between points.
-
-Among other applications, the EMST can be used to compute hierarchical clusterings
-of data.  A <em>single-linkage clustering</em> can be obtained from the EMST by deleting
-all edges longer than a given cluster length.  This technique is also referred to as a <em>Friends-of-Friends</em> clustering in the astronomy literature.
-
-mlpack includes an implementation of <b>Dual-Tree Boruvka</b> which uses
-\f$kd\f$-trees by default; this is the empirically and theoretically fastest
-EMST algorithm.  In addition, the implementation supports the use of different
-trees via templates.  For more details, see the following paper:
-
-@code
-@inproceedings{march2010fast,
-  title={Fast {E}uclidean minimum spanning tree: algorithm, analysis, and
-applications},
-  author={March, William B. and Ram, Parikshit and Gray, Alexander G.},
-  booktitle={Proceedings of the 16th ACM SIGKDD International Conference on
-Knowledge Discovery and Data Mining (KDD '10)},
-  pages={603--612},
-  year={2010},
-  organization={ACM}
-}
-@endcode
-
-\b mlpack provides:
-
- - a \ref cli_emsttut "simple command-line executable" to compute the EMST of a given data set
- - a \ref dtb_emsttut "simple C++ interface" to compute the EMST
-
-@section toc_emsttut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_emsttut
- - \ref toc_emsttut
- - \ref cli_emsttut
- - \ref dtb_emsttut
- - \ref further_doc_emsttut
-
-@section cli_emsttut Command-Line 'EMST'
-
-The \c mlpack_emst executable in \b mlpack will compute the EMST of a given set
-of points and store the resulting edge list to a file.
-
-The output file contains an edge list representation of the MST in an
-\f$n-1 \times 3 \f$ matrix, where the first and second columns are labels of
-points and the third column is the edge weight.  The edges are sorted in order
-of increasing weight.
-
-Below are several examples of simple usage (and the resultant output).  The
-\c -v option is used so that verbose output is given.  Further documentation on
-each individual option can be found by typing
-
-@code
-$ mlpack_emst --help
-@endcode
-
-@code
-$ mlpack_emst --input_file=dataset.csv --output_file=edge_list.csv -v
-[INFO ] Reading in data.
-[INFO ] Loading 'dataset.csv' as CSV data.
-[INFO ] Data read, building tree.
-[INFO ] Tree built, running algorithm.
-[INFO ] 4 edges found so far.
-[INFO ] 5 edges found so far.
-[INFO ] Total spanning tree length: 1002.45
-[INFO ] Saving CSV data to 'edge_list.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_file: dataset.csv
-[INFO ]   leaf_size: 1
-[INFO ]   naive: false
-[INFO ]   output_file: edge_list.csv
-[INFO ]   verbose: true
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   emst/mst_computation: 0.000179s
-[INFO ]   emst/tree_building: 0.000061s
-[INFO ]   total_time: 0.052641s
-@endcode
-
-The code performs at most \f$\log N\f$ iterations for \f$N\f$ data points.  It will print an update on the number of MST edges found after each iteration.
-Convenient program timers are given for different parts of the calculation at
-the bottom of the output, as well as the parameters the simulation was run with.
-
-@code
-$ cat dataset.csv
-0, 0
-1, 1
-3, 3
-0.5, 0
-1000, 0
-1001, 0
-
-$ cat edge_list.csv
-0.0000000000e+00,3.0000000000e+00,5.0000000000e-01
-4.0000000000e+00,5.0000000000e+00,1.0000000000e+00
-1.0000000000e+00,3.0000000000e+00,1.1180339887e+00
-1.0000000000e+00,2.0000000000e+00,2.8284271247e+00
-2.0000000000e+00,4.0000000000e+00,9.9700451353e+02
-@endcode
-
-The input points are labeled 0-5.  The output tells us that the MST connects
-point 0 to point 3, point 4 to point 5, point 1 to point 3, point 1 to point 2,
-and point 2 to point 4, with the corresponding edge weights given in the third
-column.  The total length of the MST is also given in the verbose output.
-
-Note that it is also possible to compute the EMST using a naive (\f$O(N^2)\f$)
-algorithm for timing and comparison purposes, using the \c --naive option.
-
-@section dtb_emsttut The 'DualTreeBoruvka' class
-
-The 'DualTreeBoruvka' class contains our implementation of the Dual-Tree Boruvka
-algorithm.
-
-The class has two constructors: the first takes the data set, constructs the
-tree (where the type of tree constructed is the TreeType template parameter),
-and computes the MST.  The second takes data set and an already constructed
-tree.
-
-The class provides one method that performs the MST computation:
-@code
-void ComputeMST(const arma::mat& results);
-@endcode
-
-This method stores the computed MST in the matrix results in the format given above.
-
-@section further_doc_emsttut Further documentation
-
-For further documentation on the DualTreeBoruvka class, consult the
-\ref mlpack::emst::DualTreeBoruvka "complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/emst.md 4.0.1-1/doc/tutorials/emst.md
--- 3.4.2-7/doc/tutorials/emst.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/emst.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,136 @@
+# EMST Tutorial
+
+The Euclidean Minimum Spanning Tree problem is widely used in machine learning
+and data mining applications.  Given a set `S` of points in `R^d`, our task is
+to compute lowest weight spanning tree in the complete graph on `S` with edge
+weights given by the Euclidean distance between points.
+
+Among other applications, the EMST can be used to compute hierarchical
+clusterings of data.  A *single-linkage clustering* can be obtained from the
+EMST by deleting all edges longer than a given cluster length.  This technique
+is also referred to as a *Friends-of-Friends* clustering in the astronomy
+literature.
+
+mlpack includes an implementation of ***Dual-Tree Boruvka*** which uses
+`kd`-trees by default; this is the empirically and theoretically fastest EMST
+algorithm.  In addition, the implementation supports the use of different trees
+via templates.  For more details, see the following paper:
+
+```
+@inproceedings{march2010fast,
+  title={Fast {E}uclidean minimum spanning tree: algorithm, analysis, and
+applications},
+  author={March, William B. and Ram, Parikshit and Gray, Alexander G.},
+  booktitle={Proceedings of the 16th ACM SIGKDD International Conference on
+Knowledge Discovery and Data Mining (KDD '10)},
+  pages={603--612},
+  year={2010},
+  organization={ACM}
+}
+```
+
+mlpack provides:
+
+ - a simple command-line executable to compute the EMST of a given data set
+ - a simple C++ interface to compute the EMST
+
+## Command-line `mlpack_emst`
+
+The `mlpack_emst` program in mlpack will compute the EMST of a given set
+of points and store the resulting edge list to a file.  Note that mlpack also
+has bindings to other languages, and so there also exists, e.g., an `emst()`
+function in Python and other similar functions in other languages.  Although
+these examples are written for the command-line `mlpack_emst` program, it is
+easy to adapt each of these to another language.
+
+The output file contains an edge list representation of the MST in an `(n - 1) x
+3` matrix, where the first and second columns are labels of points and the third
+column is the edge weight.  The edges are sorted in order of increasing weight.
+
+Below are several examples of simple usage (and the resultant output).  The `-v`
+option is used so that verbose output is given.  Further documentation on each
+individual option can be found by typing
+
+```sh
+$ mlpack_emst --help
+```
+
+```sh
+$ mlpack_emst --input_file=dataset.csv --output_file=edge_list.csv -v
+[INFO ] Reading in data.
+[INFO ] Loading 'dataset.csv' as CSV data.
+[INFO ] Data read, building tree.
+[INFO ] Tree built, running algorithm.
+[INFO ] 4 edges found so far.
+[INFO ] 5 edges found so far.
+[INFO ] Total spanning tree length: 1002.45
+[INFO ] Saving CSV data to 'edge_list.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_file: dataset.csv
+[INFO ]   leaf_size: 1
+[INFO ]   naive: false
+[INFO ]   output_file: edge_list.csv
+[INFO ]   verbose: true
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   emst/mst_computation: 0.000179s
+[INFO ]   emst/tree_building: 0.000061s
+[INFO ]   total_time: 0.052641s
+```
+
+The code performs at most `log N` iterations for `N` data points.  It will print
+an update on the number of MST edges found after each iteration.  Convenient
+program timers are given for different parts of the calculation at the bottom of
+the output, as well as the parameters the simulation was run with.
+
+```sh
+$ cat dataset.csv
+0, 0
+1, 1
+3, 3
+0.5, 0
+1000, 0
+1001, 0
+
+$ cat edge_list.csv
+0.0000000000e+00,3.0000000000e+00,5.0000000000e-01
+4.0000000000e+00,5.0000000000e+00,1.0000000000e+00
+1.0000000000e+00,3.0000000000e+00,1.1180339887e+00
+1.0000000000e+00,2.0000000000e+00,2.8284271247e+00
+2.0000000000e+00,4.0000000000e+00,9.9700451353e+02
+```
+
+The input points are labeled 0-5.  The output tells us that the MST connects
+point 0 to point 3, point 4 to point 5, point 1 to point 3, point 1 to point 2,
+and point 2 to point 4, with the corresponding edge weights given in the third
+column.  The total length of the MST is also given in the verbose output.
+
+Note that it is also possible to compute the EMST using a naive (`O(N^2)`)
+algorithm for timing and comparison purposes, using the `--naive` option.
+
+## The `DualTreeBoruvka` class
+
+The `DualTreeBoruvka` class contains our implementation of the Dual-Tree Boruvka
+algorithm.
+
+The class has two constructors: the first takes the data set, constructs the
+tree (where the type of tree constructed is the TreeType template parameter),
+and computes the MST.  The second takes data set and an already constructed
+tree.
+
+The class provides one method that performs the MST computation:
+
+```c++
+void ComputeMST(const arma::mat& results);
+```
+
+This method stores the computed MST in the matrix results in the format given
+above.
+
+## Further documentation
+
+For further documentation on the `DualTreeBoruvka` class, consult the comments
+in the source code, in `mlpack/methods/emst/dtb.hpp`.
diff -pruN 3.4.2-7/doc/tutorials/fastmks/fastmks.txt 4.0.1-1/doc/tutorials/fastmks/fastmks.txt
--- 3.4.2-7/doc/tutorials/fastmks/fastmks.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/fastmks/fastmks.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,599 +0,0 @@
-/*!
-
-@file fastmks.txt
-@author Ryan Curtin
-@brief Tutorial for how to use FastMKS in mlpack.
-
-@page fmkstutorial Fast max-kernel search tutorial (fastmks)
-
-@section intro_fmkstut Introduction
-
-The FastMKS algorithm (fast exact max-kernel search) is a recent algorithm
-proposed in the following papers:
-
-@code
-@inproceedings{curtin2013fast,
-  title={Fast Exact Max-Kernel Search},
-  author={Curtin, Ryan R. and Ram, Parikshit and Gray, Alexander G.},
-  booktitle={Proceedings of the 2013 SIAM International Conference on Data
-      Mining (SDM '13)},
-  year={2013},
-  pages={1--9}
-}
-
-@article{curtin2014dual,
-  author = {Curtin, Ryan R. and Ram, Parikshit},
-  title = {Dual-tree fast exact max-kernel search},
-  journal = {Statistical Analysis and Data Mining},
-  volume = {7},
-  number = {4},
-  publisher = {Wiley Subscription Services, Inc., A Wiley Company},
-  issn = {1932-1872},
-  url = {http://dx.doi.org/10.1002/sam.11218},
-  doi = {10.1002/sam.11218},
-  pages = {229--253},
-  year = {2014},
-}
-@endcode
-
-Given a set of query points \f$Q\f$ and a set of reference points \f$R\f$, the
-FastMKS algorithm is a fast dual-tree (or single-tree) algorithm which finds
-
-\f[
-\arg\max_{p_r \in R} K(p_q, p_r)
-\f]
-
-for all points \f$p_q \in Q\f$ and for some Mercer kernel \f$K(\cdot, \cdot)\f$.
-A Mercer kernel is a kernel that is positive semidefinite; these are the classes
-of kernels that can be used with the kernel trick.  In short, the positive
-semidefiniteness of a Mercer kernel means that any kernel matrix (or Gram
-matrix) created on a dataset must be positive semidefinite.
-
-The FastMKS algorithm builds trees on the datasets \f$Q\f$ and \f$R\f$ in such a
-way that explicit representation of the points in the kernel space is
-unnecessary, by using cover trees (\ref mlpack::tree::CoverTree).  This allows
-the algorithm to be run, for instance, on string kernels, where there is no
-sensible explicit representation.  The \b mlpack implementation allows any type
-of tree that does not require an explicit representation to be used.  For more
-details, see the paper.
-
-At the time of this writing there is no other fast algorithm for exact
-max-kernel search.  \b mlpack implements both single-tree and dual-tree fast
-max-kernel search.
-
-\b mlpack provides:
-
- - a \ref cli_fmkstut "simple command-line executable" to run FastMKS
- - a \ref fastmks_fmkstut "C++ interface" to run FastMKS
-
-@section toc_fmkstut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_fmkstut
- - \ref toc_fmkstut
- - \ref cli_fmkstut
-   - \ref cli_ex1_fmkstut
-   - \ref cli_ex2_fmkstut
-   - \ref cli_ex3_fmkstut
-   - \ref cli_ex4_fmkstut
-   - \ref cli_ex5_fmkstut
-   - \ref cli_ex6_fmkstut
-   - \ref cli_ex7_fmkstut
- - \ref fastmks_fmkstut
-   - \ref fastmks_ex1_fmkstut
-   - \ref fastmks_ex2_fmkstut
-   - \ref fastmks_ex3_fmkstut
-   - \ref fastmks_ex4_fmkstut
- - \ref writing_kernel_fmkstut
- - \ref custom_tree_fmkstut
- - \ref objects_fmkstut
- - \ref further_doc_fmkstut
-
-@section cli_fmkstut Command-line FastMKS (mlpack_fastmks)
-
-\b mlpack provides a command-line program, \c mlpack_fastmks, which is used to
-perform FastMKS on a given query and reference dataset.  It supports numerous
-different types of kernels:
-
- - \ref mlpack::kernel::LinearKernel "linear kernel"
- - \ref mlpack::kernel::PolynomialKernel "polynomial kernel"
- - \ref mlpack::kernel::CosineDistance "cosine distance"
- - \ref mlpack::kernel::GaussianKernel "Gaussian kernel"
- - \ref mlpack::kernel::EpanechnikovKernel "Epanechnikov kernel"
- - \ref mlpack::kernel::TriangularKernel "triangular kernel"
- - \ref mlpack::kernel::HyperbolicTangentKernel "hyperbolic tangent kernel"
-
-Note that when a shift-invariant kernel is used, the results will be the same as
-nearest neighbor search, so @ref nstutorial "KNN" may be a better option.  A
-shift-invariant kernel is a kernel that depends only on the distance between the
-two input points.  The \ref mlpack::kernel::GaussianKernel "Gaussian kernel",
-\ref mlpack::kernel::EpanechnikovKernel "Epanechnikov kernel", and \ref
-mlpack::kernel::TriangularKernel "triangular kernel" are instances of
-shift-invariant kernels.  The paper contains more details on this situation.
-The \c mlpack_fastmks executable still provides these kernels as options,
-though.
-
-The following examples detail usage of the \c mlpack_fastmks program.  Note that
-you can get documentation on all the possible parameters by typing:
-
-@code
-$ mlpack_fastmks --help
-@endcode
-
-@subsection cli_ex1_fmkstut FastMKS with a linear kernel on one dataset
-
-If only one dataset is specified (with \c -r or \c --reference_file), the
-reference dataset is taken to be both the query and reference datasets.  The
-example below finds the 4 maximum kernels of each point in dataset.csv, using
-the default linear kernel.
-
-@code
-$ mlpack_fastmks -r dataset.csv -k 4 -v -p products.csv -i indices.csv
-@endcode
-
-When the operation completes, the values of the kernels are saved in
-products.csv and the indices of the points which give the maximum kernels are
-saved in indices.csv.
-
-@code
-$ head indices.csv
-762,910,863,890
-762,910,426,568
-910,762,863,426
-762,910,863,426
-863,910,614,762
-762,863,910,614
-762,910,488,568
-762,910,863,426
-910,762,863,426
-863,762,910,614
-@endcode
-
-@code
-$ head products.csv
-1.6221652894e+00,1.5998743443e+00,1.5898890769e+00,1.5406789753e+00
-1.3387953449e+00,1.3317349486e+00,1.2966613184e+00,1.2774493620e+00
-1.6386110476e+00,1.6332029753e+00,1.5952629124e+00,1.5887195330e+00
-1.0917545803e+00,1.0820878726e+00,1.0668992636e+00,1.0419838050e+00
-1.2272441028e+00,1.2169643942e+00,1.2104597963e+00,1.2067780154e+00
-1.5720962456e+00,1.5618504956e+00,1.5609069923e+00,1.5235605095e+00
-1.3655478674e+00,1.3548593212e+00,1.3311547298e+00,1.3250728881e+00
-2.0119149744e+00,2.0043668067e+00,1.9847289214e+00,1.9298280046e+00
-1.1586923205e+00,1.1494586097e+00,1.1274872962e+00,1.1248172766e+00
-4.4789820372e-01,4.4618539778e-01,4.4200024852e-01,4.3989721792e-01
-@endcode
-
-We can see in this example that for point 0, the point with maximum kernel value
-is point 762, with a kernel value of 1.622165.  For point 3, the point with
-third largest kernel value is point 863, with a kernel value of 1.0669.
-
-@subsection cli_ex2_fmkstut FastMKS on a reference and query dataset
-
-The query points may be different than the reference points.  To specify a
-different query set, the \c -q (or \c --query_file) option is used, as in the
-example below.
-
-@code
-$ mlpack_fastmks -q query_set.csv -r reference_set.csv -k 5 -i indices.csv \
-> -p products.csv
-@endcode
-
-@subsection cli_ex3_fmkstut FastMKS with a different kernel
-
-The \c mlpack_fastmks program offers more than just the linear kernel.  Valid
-options are \c 'linear', \c 'polynomial', \c 'cosine', \c 'gaussian',
-\c 'epanechnikov', \c 'triangular' and \c 'hyptan' (the hyperbolic tangent
-kernel).  Note that the hyperbolic tangent kernel is provably not a Mercer
-kernel but is positive semidefinite on most datasets and is commonly used as a
-kernel.  Note also that the Gaussian kernel and other shift-invariant kernels
-give the same results as nearest neighbor search (see \ref nstutorial).
-
-The kernel to use is specified with the \c -K (or \c --kernel) option.  The
-example below uses the cosine similarity as a kernel.
-
-@code
-$ mlpack_fastmks -r dataset.csv -k 5 -K cosine -i indices.csv -p products.csv -v
-@endcode
-
-@subsection cli_ex4_fmkstut Using single-tree search or naive search
-
-In some cases, it may be useful to not use the dual-tree FastMKS algorithm.
-Instead you can specify the \c --single option, indicating that a tree should be
-built only on the reference set, and then the queries should be processed in a
-linear scan (instead of in a tree).  Alternately, the \c -N (or \c --naive)
-option makes the program not build trees at all and instead use brute-force
-search to find the solutions.
-
-The example below uses single-tree search on two datasets with the linear
-kernel.
-
-@code
-$ mlpack_fastmks -q query_set.csv -r reference_set.csv --single -k 5 \
-> -p products.csv -i indices.csv -K linear
-@endcode
-
-The example below uses naive search on one dataset.
-
-@code
-$ mlpack_fastmks -r reference_set.csv -k 5 -N -p products.csv -i indices.csv
-@endcode
-
-@subsection cli_ex5_fmkstut Parameters for alternate kernels
-
-Many of the alternate kernel choices have parameters which can be chosen; these
-are detailed in this section.
-
- - \b \c -w (\c --bandwidth): this sets the bandwidth of the kernel, and is
-   applicable to the \c 'gaussian', \c 'epanechnikov', and \c 'triangular'
-   kernels.  This is the "spread" of the kernel.
-
- - \b \c -d (\c --degree): this sets the degree of the polynomial kernel (the
-   power to which the result is raised).  It is only applicable to the \c
-   'polynomial' kernel.
-
- - \b \c -o (\c --offset): this sets the offset of the kernel, for the \c
-   'polynomial' and \c 'hyptan' kernel.  See \ref
-   mlpack::kernel::PolynomialKernel "the polynomial kernel documentation" and
-   \ref mlpack::kernel::HyperbolicTangentKernel
-   "the hyperbolic tangent kernel documentation" for more information.
-
- - \b \c -s (\c --scale): this sets the scale of the kernel, and is only
-   applicable to the \c 'hyptan' kernel.  See \ref
-   mlpack::kernel::HyperbolicTangentKernel
-   "the hyperbolic tangent kernel documentation" for more information.
-
-@subsection cli_ex6_fmkstut Saving a FastMKS model/tree
-
-The \c mlpack_fastmks program also supports saving a model built on a reference
-dataset (this model includes the tree, the kernel, and the search parameters).
-The \c --output_model_file or \c -M option allows one to save these parameters
-to disk for later usage.  An example is below:
-
-@code
-$ mlpack_fastmks -r reference_set.csv -K cosine -M fastmks_model.xml
-@endcode
-
-This example builds a tree on the dataset in \c reference_set.csv using the
-cosine similarity kernel, and saves the resulting model to \c fastmks_model.xml.
-This model may then be used in later calls to the \c mlpack_fastmks program.
-
-@subsection cli_ex7_fmkstut Loading a FastMKS model for further searches
-
-Supposing that a FastMKS model has been saved with the \c --output_model_file or
-\c -M parameter, that model can then be later loaded in subsequent calls to the
-\c mlpack_fastmks program, using the \c --input_model_file or \c -m option.  For
-instance, with a model saved in \c fastmks_model.xml and a query set in
-\c query_set.csv, we can find 3 max-kernel candidates, saving to \c indices.csv
-and \c kernels.csv:
-
-@code
-$ mlpack_fastmks -m fastmks_model.xml -k 3 -i indices.csv -p kernels.csv
-@endcode
-
-Loading a model as opposed to building a model is advantageous because the
-reference tree is already built.  So, among other situations, this could be
-useful in the setting where many different query sets (or many different values
-of k) will be used.
-
-Note that the kernel cannot be changed in a saved model without rebuilding the
-model entirely.
-
-@section fastmks_fmkstut The 'FastMKS' class
-
-The \c FastMKS<> class offers a simple API for use within C++ applications, and
-allows further flexibility in kernel choice and tree type choice.  However,
-\c FastMKS<> has no default template parameter for the kernel type -- that must
-be manually specified.  Choices that \b mlpack provides include:
-
- - \ref mlpack::kernel::LinearKernel
- - \ref mlpack::kernel::PolynomialKernel
- - \ref mlpack::kernel::CosineDistance
- - \ref mlpack::kernel::GaussianKernel
- - \ref mlpack::kernel::EpanechnikovKernel
- - \ref mlpack::kernel::TriangularKernel
- - \ref mlpack::kernel::HyperbolicTangentKernel
- - \ref mlpack::kernel::LaplacianKernel
- - \ref mlpack::kernel::PSpectrumStringKernel
-
-The following examples use kernels from that list.  Writing your own kernel is
-detailed in \ref writing_kernel_fmkstut "the next section".  Remember that when
-you are using the C++ interface, the data matrices must be column-major.  See
-\ref matrices for more information.
-
-@subsection fastmks_ex1_fmkstut FastMKS on one dataset
-
-Given only a reference dataset, the following code will run FastMKS with k set
-to 5.
-
-@code
-#include <mlpack/methods/fastmks/fastmks.hpp>
-#include <mlpack/core/kernels/linear_kernel.hpp>
-
-using namespace mlpack::fastmks;
-
-// The reference dataset, which is column-major.
-extern arma::mat data;
-
-// This will initialize the FastMKS object with the linear kernel with default
-// options: K(x, y) = x^T y.  The tree is built in the constructor.
-FastMKS<LinearKernel> f(data);
-
-// The results will be stored in these matrices.
-arma::Mat<size_t> indices;
-arma::mat products;
-
-// Run FastMKS.
-f.Search(5, indices, products);
-@endcode
-
-@subsection fastmks_ex2_fmkstut FastMKS with a query and reference dataset
-
-In this setting we have both a query and reference dataset.  We search for 10
-maximum kernels.
-
-@code
-#include <mlpack/methods/fastmks/fastmks.hpp>
-#include <mlpack/core/kernels/triangular_kernel.hpp>
-
-using namespace mlpack::fastmks;
-using namespace mlpack::kernel;
-
-// The reference and query datasets, which are column-major.
-extern arma::mat referenceData;
-extern arma::mat queryData;
-
-// This will initialize the FastMKS object with the triangular kernel with
-// default options (bandwidth of 1).  The reference tree is built in the
-// constructor.
-FastMKS<TriangularKernel> f(referenceData);
-
-// The results will be stored in these matrices.
-arma::Mat<size_t> indices;
-arma::mat products;
-
-// Run FastMKS.  The query tree is built during the call to Search().
-f.Search(queryData, 10, indices, products);
-@endcode
-
-@subsection fastmks_ex3_fmkstut FastMKS with an initialized kernel
-
-Often, kernels have parameters which need to be specified.  \c FastMKS<> has
-constructors which take initialized kernels.  Note that temporary kernels cannot
-be passed as an argument.  The example below initializes a \c PolynomialKernel
-object and then runs FastMKS with a query and reference dataset.
-
-@code
-#include <mlpack/methods/fastmks/fastmks.hpp>
-#include <mlpack/core/kernels/polynomial_kernel.hpp>
-
-using namespace mlpack::fastmks;
-using namespace mlpack::kernel;
-
-// The reference and query datasets, which are column-major.
-extern arma::mat referenceData;
-extern arma::mat queryData;
-
-// Initialize the polynomial kernel with degree of 3 and offset of 2.5.
-PolynomialKernel pk(3.0, 2.5);
-
-// Create the FastMKS object with the initialized kernel.
-FastMKS<PolynomialKernel> f(referenceData, pk);
-
-// The results will be stored in these matrices.
-arma::Mat<size_t> indices;
-arma::mat products;
-
-// Run FastMKS.
-f.Search(queryData, 10, indices, products);
-@endcode
-
-The syntax for running FastMKS with one dataset and an initialized kernel is
-very similar:
-
-@code
-f.Search(10, indices, products);
-@endcode
-
-@subsection fastmks_ex4_fmkstut FastMKS with an already-created tree
-
-By default, \c FastMKS<> uses the cover tree datastructure (see \ref
-mlpack::tree::CoverTree).  Sometimes, it is useful to modify the parameters of
-the cover tree.  In this scenario, a tree must be built outside of the
-constructor, and then passed to the appropriate \c FastMKS<> constructor.  An
-example on just a reference dataset is shown below, where the base of the cover
-tree is modified.
-
-We also use an instantiated kernel, but because we are building our own tree, we
-must use \ref mlpack::metric::IPMetric "IPMetric" so that our tree is built on
-the metric induced by our kernel function.
-
-@code
-#include <mlpack/methods/fastmks/fastmks.hpp>
-#include <mlpack/core/kernels/polynomial_kernel.hpp>
-
-// The reference dataset, which is column-major.
-extern arma::mat data;
-
-// Initialize the polynomial kernel with a degree of 4 and offset of 2.0.
-PolynomialKernel pk(4.0, 2.0);
-
-// Create the metric induced by this kernel (because a kernel is not a metric
-// and we can't build a tree on a kernel alone).
-IPMetric<PolynomialKernel> metric(pk);
-
-// Now build a tree on the reference dataset using the instantiated metric and
-// the custom base of 1.5 (default is 1.3).  We have to be sure to use the right
-// type here -- FastMKS needs the FastMKSStat object as the tree's
-// StatisticType.
-typedef tree::CoverTree<IPMetric<PolynomialKernel>, tree::FirstPointIsRoot,
-    FastMKSStat> TreeType; // Convenience typedef.
-TreeType* tree = new TreeType(data, metric, 1.5);
-
-// Now initialize FastMKS with that statistic.  We don't need to specify the
-// TreeType template parameter since we are still using the default.  We don't
-// need to pass the kernel because that is contained in the tree.
-FastMKS<PolynomialKernel> f(tree);
-
-// The results will be stored in these matrices.
-arma::Mat<size_t> indices;
-arma::mat products;
-
-// Run FastMKS.
-f.Search(10, indices, products);
-@endcode
-
-The syntax is similar for the case where different query and reference datasets
-are given; but trees for both need to be built in the manner specified above.
-Be sure to build both trees using the same metric (or at least a metric with the
-exact same parameters).
-
-@code
-f.Search(queryTree, 10, indices, products);
-@endcode
-
-@section writing_kernel_fmkstut Writing a custom kernel for FastMKS
-
-While \b mlpack provides some number of kernels in the mlpack::kernel namespace,
-it is easy to create a custom kernel.  To satisfy the KernelType policy, a class
-must implement the following methods:
-
-@code
-// Empty constructor is required.
-KernelType();
-
-// Evaluate the kernel between two points.
-template<typename VecType>
-double Evaluate(const VecType& a, const VecType& b);
-@endcode
-
-The template parameter \c VecType is helpful (but not necessary) so that the
-kernel can be used with both sparse and dense matrices (\c arma::sp_mat and \c
-arma::mat).
-
-@section custom_tree_fmkstut Using other tree types for FastMKS
-
-The use of the cover tree (see \ref mlpack::tree::CoverTree "CoverTree") is not
-necessary for FastMKS, although it is the default tree type.  A different type
-of tree can be specified with the TreeType template parameter.  However, the
-tree type is required to have \ref mlpack::fastmks::FastMKSStat "FastMKSStat" as
-the StatisticType, and for FastMKS to work, the tree must be built only on
-kernel evaluations (or distance evaluations in the kernel space via
-\ref mlpack::metric::IPMetric "IPMetric::Evaluate()").
-
-Below is an example where a custom tree class, \c CustomTree, is used as the
-tree type for FastMKS.  In this example FastMKS is only run on one dataset.
-
-@code
-#include <mlpack/methods/fastmks/fastmks.hpp>
-#include "custom_tree.hpp"
-
-using namespace mlpack::fastmks;
-using namespace mlpack::tree;
-
-// The dataset that FastMKS will be run on.
-extern arma::mat data;
-
-// The custom tree type.  We'll assume that the first template parameter is the
-// statistic type.
-typedef CustomTree<FastMKSStat> TreeType;
-
-// The FastMKS constructor will create the tree.
-FastMKS<LinearKernel, arma::mat, TreeType> f(data);
-
-// These will hold the results.
-arma::Mat<size_t> indices;
-arma::mat products;
-
-// Run FastMKS.
-f.Search(5, indices, products);
-@endcode
-
-@section objects_fmkstut Running FastMKS on objects
-
-FastMKS has a lot of utility on objects which are not representable in some sort
-of metric space.  These objects might be strings, graphs, models, or other
-objects.  For these types of objects, questions based on distance don't really
-make sense.  One good example is with strings.  The question "how far is 'dog'
-from 'Taki Inoue'?" simply doesn't make sense.  We can't have a centroid of the
-terms 'Fritz', 'E28', and 'popsicle'.
-
-However, what we can do is define some sort of kernel on these objects.  These
-kernels generally correspond to some similarity measure, with one example being
-the p-spectrum string kernel (see \ref mlpack::kernel::PSpectrumStringKernel).
-Using that, we can say "how similar is 'dog' to 'Taki Inoue'?" and get an actual
-numerical result by evaluating K('dog', 'Taki Inoue') (where K is our p-spectrum
-string kernel).
-
-The only requirement on these kernels is that they are positive definite kernels
-(or Mercer kernels).  For more information on those details, refer to the
-FastMKS paper.
-
-Remember that FastMKS is a tree-based method.  But trees like the binary space
-tree require centroids -- and as we said earlier, centroids often don't make
-sense with these types of objects.  Therefore, we need a type of tree which is
-built \b exclusively on points in the dataset -- those are points which we can
-evaluate our kernel function on.  The cover tree is one example of a type of
-tree satisfying this condition; its construction will only call the kernel
-function on two points that are in the dataset.
-
-But, we have one more problem.  The \c CoverTree class is built on \c arma::mat
-objects (dense matrices).  Our objects, however, are not necessarily
-representable in a column of a matrix.  To use the example we have been using,
-strings cannot be represented easily in a matrix because they may all have
-different lengths.
-
-The way to work around this problem is to create a "fake" data matrix which
-simply holds indices to objects.  A good example of how to do this is detailed
-in the documentation for the \ref mlpack::kernel::PSpectrumStringKernel
-"PSpectrumStringKernel".
-
-In short, the trick is to make each data matrix one-dimensional and containing
-linear indices:
-
-@code
-arma::mat data = "0 1 2 3 4 5 6 7 8";
-@endcode
-
-Then, when \c Evaluate() is called on the kernel function, the parameters will
-be two one-dimensional vectors that simply contain indices to objects.  The
-example below details the process a little better:
-
-@code
-// This function evaluates the kernel on two Objects (in this example, its
-// implementation is not important; the only important thing is that the
-// function exists).
-double ObjectKernel::Evaluate(const Object& a, const Object& b) const;
-
-template<typename VecType>
-double ObjectKernel::Evaluate(const VecType& a, const VecType& b) const
-{
-  // Extract the indices from the vectors.
-  const size_t indexA = size_t(a[0]);
-  const size_t indexB = size_t(b[0]);
-
-  // Assume that 'objects' is an array (or std::vector or other container)
-  // holding Objects.
-  const Object& objectA = objects[indexA];
-  const Object& objectB = objects[indexB];
-
-  // Now call the function that does the actual evaluation on the objects and
-  // return its result.
-  return Evaluate(objectA, objectB);
-}
-@endcode
-
-As written earlier, the documentation for \ref
-mlpack::kernel::PSpectrumStringKernel "PSpectrumStringKernel" is a good place to
-consult for further reference on this.  That kernel uses two dimensional
-indices; one dimension represents the index of the string, and the other
-represents whether it is referring to the query set or the reference set.  If
-your kernel is meant to work on separate query and reference sets, that strategy
-should be considered.
-
-@section further_doc_fmkstut Further documentation
-
-For further documentation on the FastMKS class, consult the \ref
-mlpack::fastmks::FastMKS "complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/fastmks.md 4.0.1-1/doc/tutorials/fastmks.md
--- 3.4.2-7/doc/tutorials/fastmks.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/fastmks.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,551 @@
+# Fast max-kernel search tutorial (FastMKS)
+
+The FastMKS algorithm (fast exact max-kernel search) is a recent algorithm
+proposed in the following papers:
+
+```
+@inproceedings{curtin2013fast,
+  title={Fast Exact Max-Kernel Search},
+  author={Curtin, Ryan R. and Ram, Parikshit and Gray, Alexander G.},
+  booktitle={Proceedings of the 2013 SIAM International Conference on Data
+      Mining (SDM '13)},
+  year={2013},
+  pages={1--9}
+}
+
+@article{curtin2014dual,
+  author = {Curtin, Ryan R. and Ram, Parikshit},
+  title = {Dual-tree fast exact max-kernel search},
+  journal = {Statistical Analysis and Data Mining},
+  volume = {7},
+  number = {4},
+  publisher = {Wiley Subscription Services, Inc., A Wiley Company},
+  issn = {1932-1872},
+  url = {http://dx.doi.org/10.1002/sam.11218},
+  doi = {10.1002/sam.11218},
+  pages = {229--253},
+  year = {2014},
+}
+```
+
+Given a set of query points `Q` and a set of reference points `R`, the FastMKS
+algorithm is a fast dual-tree (or single-tree) algorithm which finds
+
+```
+argmax_{p_r in R} K(p_q, p_r)
+```
+
+for all points `p_q` in `Q` and for some Mercer kernel `K()`.  A Mercer kernel
+is a kernel that is positive semidefinite; these are the classes of kernels that
+can be used with the kernel trick.  In short, the positive semidefiniteness of a
+Mercer kernel means that any kernel matrix (or Gram matrix) created on a dataset
+must be positive semidefinite.
+
+The FastMKS algorithm builds trees on the datasets `Q` and `R` in such a way
+that explicit representation of the points in the kernel space is unnecessary,
+by using cover trees (see `CoverTree`).  This allows the algorithm to be run,
+for instance, on string kernels, where there is no sensible explicit
+representation.  The mlpack implementation allows any type of tree that does not
+require an explicit representation to be used.  For more details, see the paper.
+
+At the time of this writing there is no other fast algorithm for exact
+max-kernel search.  mlpack implements both single-tree and dual-tree fast
+max-kernel search.
+
+mlpack provides:
+
+ - a simple command-line executable to run FastMKS
+ - a C++ interface to run FastMKS
+
+## Command-line FastMKS (`mlpack_fastmks`)
+
+mlpack provides a command-line program, `mlpack_fastmks`, which is used to
+perform FastMKS on a given query and reference dataset.  It supports numerous
+different types of kernels:
+
+ - `LinearKernel`
+ - `PolynomialKernel`
+ - `CosineDistance`
+ - `GaussianKernel`
+ - `EpanechnikovKernel`
+ - `TriangularKernel`
+ - `HyperbolicTangentKernel`
+
+Note that when a shift-invariant kernel is used, the results will be the same as
+nearest neighbor search, so [KNN](neighbor_search.md) may be a better option.  A
+shift-invariant kernel is a kernel that depends only on the distance between the
+two input points.  The `GaussianKernel`, `EpanechnikovKernel`, and
+`TriangularKernel` are instances of shift-invariant kernels.  The paper contains
+more details on this situation.  The `mlpack_fastmks` executable still provides
+these kernels as options, though.
+
+The following examples detail usage of the `mlpack_fastmks` program.  Note that
+you can get documentation on all the possible parameters by typing:
+
+```sh
+$ mlpack_fastmks --help
+```
+
+### FastMKS with a linear kernel on one dataset
+
+If only one dataset is specified (with `-r` or `--reference_file`), the
+reference dataset is taken to be both the query and reference datasets.  The
+example below finds the 4 maximum kernels of each point in `dataset.csv`, using
+the default linear kernel.
+
+```sh
+$ mlpack_fastmks -r dataset.csv -k 4 -v -p products.csv -i indices.csv
+```
+
+When the operation completes, the values of the kernels are saved in
+`products.csv` and the indices of the points which give the maximum kernels are
+saved in `indices.csv`.
+
+```sh
+$ head indices.csv
+762,910,863,890
+762,910,426,568
+910,762,863,426
+762,910,863,426
+863,910,614,762
+762,863,910,614
+762,910,488,568
+762,910,863,426
+910,762,863,426
+863,762,910,614
+```
+
+```sh
+$ head products.csv
+1.6221652894e+00,1.5998743443e+00,1.5898890769e+00,1.5406789753e+00
+1.3387953449e+00,1.3317349486e+00,1.2966613184e+00,1.2774493620e+00
+1.6386110476e+00,1.6332029753e+00,1.5952629124e+00,1.5887195330e+00
+1.0917545803e+00,1.0820878726e+00,1.0668992636e+00,1.0419838050e+00
+1.2272441028e+00,1.2169643942e+00,1.2104597963e+00,1.2067780154e+00
+1.5720962456e+00,1.5618504956e+00,1.5609069923e+00,1.5235605095e+00
+1.3655478674e+00,1.3548593212e+00,1.3311547298e+00,1.3250728881e+00
+2.0119149744e+00,2.0043668067e+00,1.9847289214e+00,1.9298280046e+00
+1.1586923205e+00,1.1494586097e+00,1.1274872962e+00,1.1248172766e+00
+4.4789820372e-01,4.4618539778e-01,4.4200024852e-01,4.3989721792e-01
+```
+
+We can see in this example that for point 0, the point with maximum kernel value
+is point 762, with a kernel value of 1.622165.  For point 3, the point with
+third largest kernel value is point 863, with a kernel value of 1.0669.
+
+### FastMKS on a reference and query dataset
+
+The query points may be different than the reference points.  To specify a
+different query set, the `-q` (or `--query_file`) option is used, as in the
+example below.
+
+```sh
+$ mlpack_fastmks -q query_set.csv -r reference_set.csv -k 5 -i indices.csv \
+> -p products.csv
+```
+
+### FastMKS with a different kernel
+
+The `mlpack_fastmks` program offers more than just the linear kernel.  Valid
+options are `'linear'`, `'polynomial'`, `'cosine'`, `'gaussian'`,
+`'epanechnikov'`, `'triangular'` and `'hyptan'` (the hyperbolic tangent kernel).
+Note that the hyperbolic tangent kernel is provably not a Mercer kernel but is
+positive semidefinite on most datasets and is commonly used as a kernel.  Note
+also that the Gaussian kernel and other shift-invariant kernels give the same
+results as nearest neighbor search (see [the tutorial](neighbor_search.md)).
+
+The kernel to use is specified with the `-K` (or `--kernel`) option.  The
+example below uses the cosine similarity as a kernel.
+
+```sh
+$ mlpack_fastmks -r dataset.csv -k 5 -K cosine -i indices.csv -p products.csv -v
+```
+
+### Using single-tree search or naive search
+
+In some cases, it may be useful to not use the dual-tree FastMKS algorithm.
+Instead you can specify the `--single` option, indicating that a tree should be
+built only on the reference set, and then the queries should be processed in a
+linear scan (instead of in a tree).  Alternately, the `-N` (or `--naive`) option
+makes the program not build trees at all and instead use brute-force search to
+find the solutions.
+
+The example below uses single-tree search on two datasets with the linear
+kernel.
+
+```sh
+$ mlpack_fastmks -q query_set.csv -r reference_set.csv --single -k 5 \
+> -p products.csv -i indices.csv -K linear
+```
+
+The example below uses naive search on one dataset.
+
+```sh
+$ mlpack_fastmks -r reference_set.csv -k 5 -N -p products.csv -i indices.csv
+```
+
+### Parameters for alternate kernels
+
+Many of the alternate kernel choices have parameters which can be chosen; these
+are detailed in this section.
+
+ - `-w` (`--bandwidth`): this sets the bandwidth of the kernel, and is
+   applicable to the `'gaussian'`, `'epanechnikov'`, and `'triangular'` kernels.
+   This is the "spread" of the kernel.
+
+ - `-d` (`--degree`): this sets the degree of the polynomial kernel (the power
+   to which the result is raised).  It is only applicable to the `'polynomial'`
+   kernel.
+
+ - `-o` (`--offset`): this sets the offset of the kernel, for the
+   `'polynomial'` and `'hyptan'` kernel.  See the documentation for
+   `PolynomialKernel` and `HyperbolicTangentKernel` for more information.
+
+ - `-s` (`--scale`): this sets the scale of the kernel, and is only applicable
+   to the `'hyptan'` kernel.  See the documentation for
+   `HyperbolicTangentKernel` for more information.
+
+### Saving a FastMKS model/tree
+
+The `mlpack_fastmks` program also supports saving a model built on a reference
+dataset (this model includes the tree, the kernel, and the search parameters).
+The `--output_model_file` or `-M` option allows one to save these parameters to
+disk for later usage.  An example is below:
+
+```sh
+$ mlpack_fastmks -r reference_set.csv -K cosine -M fastmks_model.xml
+```
+
+This example builds a tree on the dataset in `reference_set.csv` using the
+cosine similarity kernel, and saves the resulting model to `fastmks_model.xml`.
+This model may then be used in later calls to the `mlpack_fastmks` program.
+
+### Loading a FastMKS model for further searches
+
+Supposing that a FastMKS model has been saved with the `--output_model_file` or
+`-M` parameter, that model can then be later loaded in subsequent calls to the
+`mlpack_fastmks` program, using the `--input_model_file` or `-m` option.  For
+instance, with a model saved in `fastmks_model.xml` and a query set in
+`query_set.csv`, we can find 3 max-kernel candidates, saving to `indices.csv`
+and `kernels.csv`:
+
+```sh
+$ mlpack_fastmks -m fastmks_model.xml -k 3 -i indices.csv -p kernels.csv
+```
+
+Loading a model as opposed to building a model is advantageous because the
+reference tree is already built.  So, among other situations, this could be
+useful in the setting where many different query sets (or many different values
+of `k`) will be used.
+
+Note that the kernel cannot be changed in a saved model without rebuilding the
+model entirely.
+
+## The `FastMKS` class
+
+The `FastMKS<>` class offers a simple API for use within C++ applications, and
+allows further flexibility in kernel choice and tree type choice.  However,
+`FastMKS<>` has no default template parameter for the kernel type---that must be
+manually specified.  Choices that mlpack provides include:
+
+ - `LinearKernel`
+ - `PolynomialKernel`
+ - `CosineDistance`
+ - `GaussianKernel`
+ - `EpanechnikovKernel`
+ - `TriangularKernel`
+ - `HyperbolicTangentKernel`
+ - `LaplacianKernel`
+ - `PSpectrumStringKernel`
+
+The following examples use kernels from that list.  Writing your own kernel is
+detailed in the next section.  Remember that when you are using the C++
+interface, the data matrices must be column-major.  See the [matrices
+documentation](../user/matrices.md) for more information.
+
+### `FastMKS` on one dataset
+
+Given only a reference dataset, the following code will run FastMKS with k set
+to 5.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack::fastmks;
+
+// The reference dataset, which is column-major.
+extern arma::mat data;
+
+// This will initialize the FastMKS object with the linear kernel with default
+// options: K(x, y) = x^T y.  The tree is built in the constructor.
+FastMKS<LinearKernel> f(data);
+
+// The results will be stored in these matrices.
+arma::Mat<size_t> indices;
+arma::mat products;
+
+// Run FastMKS.
+f.Search(5, indices, products);
+```
+
+### FastMKS with a query and reference dataset
+
+In this setting we have both a query and reference dataset.  We search for 10
+maximum kernels.
+
+```
+#include <mlpack.hpp>
+
+using namespace mlpack::fastmks;
+using namespace mlpack::kernel;
+
+// The reference and query datasets, which are column-major.
+extern arma::mat referenceData;
+extern arma::mat queryData;
+
+// This will initialize the FastMKS object with the triangular kernel with
+// default options (bandwidth of 1).  The reference tree is built in the
+// constructor.
+FastMKS<TriangularKernel> f(referenceData);
+
+// The results will be stored in these matrices.
+arma::Mat<size_t> indices;
+arma::mat products;
+
+// Run FastMKS.  The query tree is built during the call to Search().
+f.Search(queryData, 10, indices, products);
+```
+
+### FastMKS with an initialized kernel
+
+Often, kernels have parameters which need to be specified.  `FastMKS<>` has
+constructors which take initialized kernels.  Note that temporary kernels cannot
+be passed as an argument.  The example below initializes a `PolynomialKernel`
+object and then runs FastMKS with a query and reference dataset.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack::fastmks;
+using namespace mlpack::kernel;
+
+// The reference and query datasets, which are column-major.
+extern arma::mat referenceData;
+extern arma::mat queryData;
+
+// Initialize the polynomial kernel with degree of 3 and offset of 2.5.
+PolynomialKernel pk(3.0, 2.5);
+
+// Create the FastMKS object with the initialized kernel.
+FastMKS<PolynomialKernel> f(referenceData, pk);
+
+// The results will be stored in these matrices.
+arma::Mat<size_t> indices;
+arma::mat products;
+
+// Run FastMKS.
+f.Search(queryData, 10, indices, products);
+```
+
+The syntax for running FastMKS with one dataset and an initialized kernel is
+very similar:
+
+```c++
+f.Search(10, indices, products);
+```
+
+### FastMKS with an already-created tree
+
+By default, `FastMKS<>` uses the cover tree datastructure (see the `CoverTree`
+documentation).  Sometimes, it is useful to modify the parameters of the cover
+tree.  In this scenario, a tree must be built outside of the constructor, and
+then passed to the appropriate `FastMKS<>` constructor.  An example on just a
+reference dataset is shown below, where the base of the cover tree is modified.
+
+We also use an instantiated kernel, but because we are building our own tree, we
+must use `IPMetric` so that our tree is built on the metric induced by our
+kernel function.
+
+```c++
+#include <mlpack.hpp>
+
+// The reference dataset, which is column-major.
+extern arma::mat data;
+
+// Initialize the polynomial kernel with a degree of 4 and offset of 2.0.
+PolynomialKernel pk(4.0, 2.0);
+
+// Create the metric induced by this kernel (because a kernel is not a metric
+// and we can't build a tree on a kernel alone).
+IPMetric<PolynomialKernel> metric(pk);
+
+// Now build a tree on the reference dataset using the instantiated metric and
+// the custom base of 1.5 (default is 1.3).  We have to be sure to use the right
+// type here -- FastMKS needs the FastMKSStat object as the tree's
+// StatisticType.
+typedef CoverTree<IPMetric<PolynomialKernel>, FirstPointIsRoot, FastMKSStat>
+    TreeType; // Convenience typedef.
+TreeType* tree = new TreeType(data, metric, 1.5);
+
+// Now initialize FastMKS with that statistic.  We don't need to specify the
+// TreeType template parameter since we are still using the default.  We don't
+// need to pass the kernel because that is contained in the tree.
+FastMKS<PolynomialKernel> f(tree);
+
+// The results will be stored in these matrices.
+arma::Mat<size_t> indices;
+arma::mat products;
+
+// Run FastMKS.
+f.Search(10, indices, products);
+```
+
+The syntax is similar for the case where different query and reference datasets
+are given; but trees for both need to be built in the manner specified above.
+Be sure to build both trees using the same metric (or at least a metric with the
+exact same parameters).
+
+```c++
+f.Search(queryTree, 10, indices, products);
+```
+
+### Writing a custom kernel for FastMKS
+
+While mlpack provides some number of kernels in the `mlpack::kernel` namespace,
+it is easy to create a custom kernel.  To satisfy the [KernelType
+policy](../developer/kernels.md), a class must implement the following methods:
+
+```c++
+// Empty constructor is required.
+KernelType();
+
+// Evaluate the kernel between two points.
+template<typename VecType>
+double Evaluate(const VecType& a, const VecType& b);
+```
+
+The template parameter `VecType` is helpful (but not necessary) so that the
+kernel can be used with both sparse and dense matrices (`arma::sp_mat` and
+`arma::mat`).
+
+### Using other tree types for FastMKS
+
+The use of the cover tree is not necessary for FastMKS, although it is the
+default tree type.  A different type of tree can be specified with the TreeType
+template parameter.  However, the tree type is required to have
+`mlpack::fastmks::FastMKSStat` as the `StatisticType`, and for FastMKS to work,
+the tree must be built only on kernel evaluations (or distance evaluations in
+the kernel space via `IPMetric::Evaluate()`).
+
+Below is an example where a custom tree class, `CustomTree`, is used as the
+tree type for FastMKS.  In this example FastMKS is only run on one dataset.
+
+```c++
+#include <mlpack.hpp>
+#include "custom_tree.hpp"
+
+using namespace mlpack::fastmks;
+using namespace mlpack::tree;
+
+// The dataset that FastMKS will be run on.
+extern arma::mat data;
+
+// The custom tree type.  We'll assume that the first template parameter is the
+// statistic type.
+typedef CustomTree<FastMKSStat> TreeType;
+
+// The FastMKS constructor will create the tree.
+FastMKS<LinearKernel, arma::mat, TreeType> f(data);
+
+// These will hold the results.
+arma::Mat<size_t> indices;
+arma::mat products;
+
+// Run FastMKS.
+f.Search(5, indices, products);
+```
+
+### Running FastMKS on objects
+
+FastMKS has a lot of utility on objects which are not representable in some sort
+of metric space.  These objects might be strings, graphs, models, or other
+objects.  For these types of objects, questions based on distance don't really
+make sense.  One good example is with strings.  The question "how far is 'dog'
+from 'Taki Inoue'?" simply doesn't make sense.  We can't have a centroid of the
+terms 'Fritz', 'E28', and 'popsicle'.
+
+However, what we can do is define some sort of kernel on these objects.  These
+kernels generally correspond to some similarity measure, with one example being
+the p-spectrum string kernel (see `PSpectrumStringKernel`).  Using that, we can
+say "how similar is 'dog' to 'Taki Inoue'?" and get an actual numerical result
+by evaluating `K('dog', 'Taki Inoue')` (where `K` is our p-spectrum string
+kernel).
+
+The only requirement on these kernels is that they are positive definite kernels
+(or Mercer kernels).  For more information on those details, refer to the
+FastMKS paper.
+
+Remember that FastMKS is a tree-based method.  But trees like the binary space
+tree require centroids---and as we said earlier, centroids often don't make
+sense with these types of objects.  Therefore, we need a type of tree which is
+built *exclusively* on points in the dataset---those are points which we can
+evaluate our kernel function on.  The cover tree is one example of a type of
+tree satisfying this condition; its construction will only call the kernel
+function on two points that are in the dataset.
+
+But, we have one more problem.  The `CoverTree` class is built on `arma::mat`
+objects (dense matrices).  Our objects, however, are not necessarily
+representable in a column of a matrix.  To use the example we have been using,
+strings cannot be represented easily in a matrix because they may all have
+different lengths.
+
+The way to work around this problem is to create a "fake" data matrix which
+simply holds indices to objects.  A good example of how to do this is detailed
+in the documentation for the `PSpectrumStringKernel` class.
+
+In short, the trick is to make each data matrix one-dimensional and containing
+linear indices:
+
+```c++
+arma::mat data = "0 1 2 3 4 5 6 7 8";
+```
+
+Then, when `Evaluate()` is called on the kernel function, the parameters will be
+two one-dimensional vectors that simply contain indices to objects.  The example
+below details the process a little better:
+
+```c++
+// This function evaluates the kernel on two Objects (in this example, its
+// implementation is not important; the only important thing is that the
+// function exists).
+double ObjectKernel::Evaluate(const Object& a, const Object& b) const;
+
+template<typename VecType>
+double ObjectKernel::Evaluate(const VecType& a, const VecType& b) const
+{
+  // Extract the indices from the vectors.
+  const size_t indexA = size_t(a[0]);
+  const size_t indexB = size_t(b[0]);
+
+  // Assume that 'objects' is an array (or std::vector or other container)
+  // holding Objects.
+  const Object& objectA = objects[indexA];
+  const Object& objectB = objects[indexB];
+
+  // Now call the function that does the actual evaluation on the objects and
+  // return its result.
+  return Evaluate(objectA, objectB);
+}
+```
+
+As written earlier, the documentation for `PSpectrumStringKernel` is a good
+place to consult for further reference on this.  That kernel uses two
+dimensional indices; one dimension represents the index of the string, and the
+other represents whether it is referring to the query set or the reference set.
+If your kernel is meant to work on separate query and reference sets, that
+strategy should be considered.
+
+## Further documentation
+
+For further documentation on the FastMKS class, consult the documentation in the
+source code for FastMKS, in `mlpack/methods/fastmks/`.
diff -pruN 3.4.2-7/doc/tutorials/image/image.txt 4.0.1-1/doc/tutorials/image/image.txt
--- 3.4.2-7/doc/tutorials/image/image.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/image/image.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,188 +0,0 @@
-/*!
-@file image.txt
-@author Mehul Kumar Nirala
-@brief Tutorial for how to load and save images in mlpack.
-
-@page imagetutorial Image Utilities tutorial
-
-@section intro_imagetu Introduction
-
-Image datasets are becoming increasingly popular in deep learning.
-
-mlpack's image saving/loading functionality is based on [stb/](https://github.com/nothings/stb).
-
-@section toc_imagetu Table of Contents
-
-This tutorial is split into the following sections:
-
- - \ref intro_imagetu
- - \ref toc_imagetu
- - \ref model_api_imagetu
- - \ref imageinfo_api_imagetu
- - \ref load_api_imagetu
- - \ref save_api_imagetu
-
-@section model_api_imagetu Model API
-
-Image utilities supports loading and saving of images.
-
-It supports filetypes "jpg", "png", "tga","bmp", "psd", "gif", "hdr", "pic", "pnm" for loading and "jpg", "png", "tga", "bmp", "hdr" for saving.
-
-The datatype associated is unsigned char to support RGB values in the range 1-255. To feed data into the network typecast of `arma::Mat` may be required. Images are stored in matrix as (width * height * channels, NumberOfImages). Therefore imageMatrix.col(0) would be the first image if images are loaded in imageMatrix.
-
-@section imageinfo_api_imagetu ImageInfo
-
-ImageInfo class contains the metadata of the images.
-@code
-  /**
-   * Instantiate the ImageInfo object with the image width, height, channels.
-   *
-   * @param width Image width.
-   * @param height Image height.
-   * @param channels number of channels in the image.
-   */
-  ImageInfo(const size_t width,
-            const size_t height,
-            const size_t channels);
-@endcode
-Other public memebers include:
-  - quality Compression of the image if saved as jpg (0-100).
-
-@section load_api_imagetu Load
-
-
-Standalone loading of images.
-@code
-  /**
-   * Load the image file into the given matrix.
-   *
-   * @param filename Name of the image file.
-   * @param matrix Matrix to load the image into.
-   * @param info An object of ImageInfo class.
-   * @param fatal If an error should be reported as fatal (default false).
-   * @param transpose If true, flips the image, same as transposing the
-   *    matrix after loading.
-   * @return Boolean value indicating success or failure of load.
-   */
-   template<typename eT>
-   bool Load(const std::string& filename,
-             arma::Mat<eT>& matrix,
-             ImageInfo& info,
-             const bool fatal,
-             const bool transpose);
-@endcode
-
-Loading a test image. It also fills up the ImageInfo class object.
-@code
-data::ImageInfo info;
-data::Load("test_image.png", matrix, info, false, true);
-@endcode
-
-ImageInfo requires height, width, number of channels of the image.
-
-@code
-size_t height = 64, width = 64, channels = 1;
-data::ImageInfo info(width, height, channels);
-@endcode
-
-More than one image can be loaded into the same matrix.
-
-Loading multiple images:
-
-@code
-  /**
-   * Load the image file into the given matrix.
-   *
-   * @param files A vector consisting of filenames.
-   * @param matrix Matrix to save the image from.
-   * @param info An object of ImageInfo class.
-   * @param fatal If an error should be reported as fatal (default false).
-   * @param transpose If true, flips the image, same as transposing the
-   *    matrix after loading.
-   * @return Boolean value indicating success or failure of load.
-   */
-   template<typename eT>
-   bool Load(const std::vector<std::string>& files,
-             arma::Mat<eT>& matrix,
-             ImageInfo& info,
-             const bool fatal,
-             const bool transpose);
-@endcode
-
-@code
-  data::ImageInfo info;
-  std::vector<std::string>> files{"test_image1.bmp","test_image2.bmp"};
-  data::load(files, matrix, info, false, true);
-@endcode
-
-@section save_api_imagetu Save
-
-Save images expects a matrix of type unsigned char in the form (width * height * channels, NumberOfImages).
-Just like load it can be used to save one image or multiple images. Besides image data it also expects the shape of the image as input (width, height, channels).
-
-Saving one image:
-
-@code
-  /**
-   * Save the image file from the given matrix.
-   *
-   * @param filename Name of the image file.
-   * @param matrix Matrix to save the image from.
-   * @param info An object of ImageInfo class.
-   * @param fatal If an error should be reported as fatal (default false).
-   * @param transpose If true, flips the image, same as transposing the
-   *    matrix after loading.
-   * @return Boolean value indicating success or failure of load.
-   */
-   template<typename eT>
-   bool Save(const std::string& filename,
-             arma::Mat<eT>& matrix,
-             ImageInfo& info,
-             const bool fatal,
-             const bool transpose);
-@endcode
-
-@code
-  data::ImageInfo info;
-  info.width = info.height = 25;
-  info.channels = 3;
-  info.quality = 90;
-  data::Save("test_image.bmp", matrix, info, false, true);
-@endcode
-
-If the matrix contains more than one image, only the first one is saved.
-
-Saving multiple images:
-
-@code
-  /**
-   * Save the image file from the given matrix.
-   *
-   * @param files A vector consisting of filenames.
-   * @param matrix Matrix to save the image from.
-   * @param info An object of ImageInfo class.
-   * @param fatal If an error should be reported as fatal (default false).
-   * @param transpose If true, Flips the image, same as transposing the
-   *    matrix after loading.
-   * @return Boolean value indicating success or failure of load.
-   */
-   template<typename eT>
-   bool Save(const std::vector<std::string>& files,
-             arma::Mat<eT>& matrix,
-             ImageInfo& info,
-             const bool fatal,
-             const bool transpose);
-@endcode
-
-@code
-  data::ImageInfo info;
-  info.width = info.height = 25;
-  info.channels = 3;
-  info.quality = 90;
-  std::vector<std::string>> files{"test_image1.bmp", "test_image2.bmp"};
-  data::Save(files, matrix, info, false, true);
-@endcode
-
-Multiple images are saved according to the vector of filenames specified.
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/image.md 4.0.1-1/doc/tutorials/image.md
--- 3.4.2-7/doc/tutorials/image.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/image.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,179 @@
+# Image Utilities Tutorial
+
+Image datasets are becoming increasingly popular in deep learning.
+
+mlpack's image saving/loading functionality is based on [stb/](https://github.com/nothings/stb).
+
+## Model API
+
+Image utilities supports loading and saving of images.
+
+It supports filetypes `jpg`, `png`, `tga`, `bmp`, `psd`, `gif`, `hdr`, `pic`,
+`pnm` for loading and `jpg`, `png`, `tga`, `bmp`, `hdr` for saving.
+
+The datatype associated is unsigned char to support RGB values in the range
+1-255. To feed data into the network typecast of `arma::Mat` may be required.
+Images are stored in matrix as `(width * height * channels, numberOfImages)`.
+Therefore `imageMatrix.col(0)` would be the first image if images are loaded in
+`imageMatrix`.
+
+## `ImageInfo`
+
+The `ImageInfo` class contains the metadata of the images.
+
+```c++
+/**
+ * Instantiate the ImageInfo object with the image width, height, channels.
+ *
+ * @param width Image width.
+ * @param height Image height.
+ * @param channels number of channels in the image.
+ */
+ImageInfo(const size_t width,
+          const size_t height,
+          const size_t channels);
+```
+
+Other public members include the quality compression of the image if saved as
+`jpg` (0-100).
+
+## Loading
+
+Standalone loading of images can be done with the function below.
+
+```c++
+/**
+ * Load the image file into the given matrix.
+ *
+ * @param filename Name of the image file.
+ * @param matrix Matrix to load the image into.
+ * @param info An object of ImageInfo class.
+ * @param fatal If an error should be reported as fatal (default false).
+ * @param transpose If true, flips the image, same as transposing the
+ *    matrix after loading.
+ * @return Boolean value indicating success or failure of load.
+ */
+template<typename eT>
+bool Load(const std::string& filename,
+          arma::Mat<eT>& matrix,
+          ImageInfo& info,
+          const bool fatal,
+          const bool transpose);
+```
+
+Loading a test image is shown below. It also fills up the `ImageInfo` class
+object.
+
+```c++
+data::ImageInfo info;
+data::Load("test_image.png", matrix, info, false, true);
+```
+
+`ImageInfo` requires height, width, number of channels of the image.
+
+```c++
+size_t height = 64, width = 64, channels = 1;
+data::ImageInfo info(width, height, channels);
+```
+
+More than one image can be loaded into the same matrix.
+
+Loading multiple images can be done using the function below.
+
+```c++
+/**
+ * Load the image file into the given matrix.
+ *
+ * @param files A vector consisting of filenames.
+ * @param matrix Matrix to save the image from.
+ * @param info An object of ImageInfo class.
+ * @param fatal If an error should be reported as fatal (default false).
+ * @param transpose If true, flips the image, same as transposing the
+ *    matrix after loading.
+ * @return Boolean value indicating success or failure of load.
+ */
+template<typename eT>
+bool Load(const std::vector<std::string>& files,
+          arma::Mat<eT>& matrix,
+          ImageInfo& info,
+          const bool fatal,
+          const bool transpose);
+```
+
+```c++
+data::ImageInfo info;
+std::vector<std::string>> files{"test_image1.bmp","test_image2.bmp"};
+data::load(files, matrix, info, false, true);
+```
+
+## Saving
+
+Saving images expects a matrix of type unsigned char in the form `(width *
+height * channels, NumberOfImages)`.  Just like loading, it can be used to save
+one image or multiple images. Besides image data it also expects the shape of
+the image as input `(width, height, channels)`.
+
+Saving one image can be done with the function below:
+
+```c++
+/**
+ * Save the image file from the given matrix.
+ *
+ * @param filename Name of the image file.
+ * @param matrix Matrix to save the image from.
+ * @param info An object of ImageInfo class.
+ * @param fatal If an error should be reported as fatal (default false).
+ * @param transpose If true, flips the image, same as transposing the
+ *    matrix after loading.
+ * @return Boolean value indicating success or failure of load.
+ */
+template<typename eT>
+bool Save(const std::string& filename,
+          arma::Mat<eT>& matrix,
+          ImageInfo& info,
+          const bool fatal,
+          const bool transpose);
+```
+
+```c++
+data::ImageInfo info;
+info.width = info.height = 25;
+info.channels = 3;
+info.quality = 90;
+data::Save("test_image.bmp", matrix, info, false, true);
+```
+
+If the matrix contains more than one image, only the first one is saved.
+
+Saving multiple images can be done with the function below.
+
+```c++
+/**
+ * Save the image file from the given matrix.
+ *
+ * @param files A vector consisting of filenames.
+ * @param matrix Matrix to save the image from.
+ * @param info An object of ImageInfo class.
+ * @param fatal If an error should be reported as fatal (default false).
+ * @param transpose If true, Flips the image, same as transposing the
+ *    matrix after loading.
+ * @return Boolean value indicating success or failure of load.
+ */
+template<typename eT>
+bool Save(const std::vector<std::string>& files,
+          arma::Mat<eT>& matrix,
+          ImageInfo& info,
+          const bool fatal,
+          const bool transpose);
+```
+
+```c++
+data::ImageInfo info;
+info.width = info.height = 25;
+info.channels = 3;
+info.quality = 90;
+std::vector<std::string>> files{"test_image1.bmp", "test_image2.bmp"};
+data::Save(files, matrix, info, false, true);
+```
+
+Multiple images are saved according to the vector of filenames specified.
diff -pruN 3.4.2-7/doc/tutorials/kmeans/kmeans.txt 4.0.1-1/doc/tutorials/kmeans/kmeans.txt
--- 3.4.2-7/doc/tutorials/kmeans/kmeans.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/kmeans/kmeans.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,698 +0,0 @@
-/*!
-
-@file kmeans.txt
-@author Ryan Curtin
-@brief Tutorial for how to use k-means in mlpack.
-
-@page kmtutorial K-Means tutorial (kmeans)
-
-@section intro_kmtut Introduction
-
-The popular k-means algorithm for clustering has been around since the late
-1950s, and the standard algorithm was proposed by Stuart Lloyd in 1957.  Given a
-set of points \f$ X \f$, k-means clustering aims to partition each point \f$ x_i
-\f$ into a cluster \f$ c_j \f$ (where \f$ j \le k \f$ and \f$ k \f$, the number
-of clusters, is a parameter).  The partitioning is done to minimize the
-objective function
-
-\f[
-\sum_{j = 1}^{k} \sum_{x_i \in c_j} \| x_i - \mu_j \|^2
-\f]
-
-where \f$\mu_j\f$ is the centroid of cluster \f$c_j\f$.  The standard algorithm
-is a two-step algorithm:
-
- - \b Assignment \b step.  Each point \f$x_i\f$ in \f$X\f$ is assigned to the
-   cluster whose centroid it is closest to.
-
- - \b Update \b step.  Using the new cluster assignments, the centroids of each
-   cluster are recalculated.
-
-The algorithm has converged when no more assignment changes are happening with
-each iteration.  However, this algorithm can get stuck in local minima of the
-objective function and is particularly sensitive to the initial cluster
-assignments.  Also, situations can arise where the algorithm will never converge
-but reaches steady state -- for instance, one point may be changing between two
-cluster assignments.
-
-There is vast literature on the k-means algorithm and its uses, as well as
-strategies for choosing initial points effectively and keeping the algorithm
-from converging in local minima.  \b mlpack does implement some of these,
-notably the Bradley-Fayyad algorithm (see the reference below) for choosing
-refined initial points.  Importantly, the C++ \c KMeans class makes it very easy
-to improve the k-means algorithm in a modular way.
-
-@code
-@inproceedings{bradley1998refining,
-  title={Refining initial points for k-means clustering},
-  author={Bradley, Paul S. and Fayyad, Usama M.},
-  booktitle={Proceedings of the Fifteenth International Conference on Machine
-      Learning (ICML 1998)},
-  volume={66},
-  year={1998}
-}
-@endcode
-
-\b mlpack provides:
-
- - a \ref cli_kmtut "simple command-line executable" to run k-means
- - a \ref kmeans_kmtut "simple C++ interface" to run k-means
- - a \ref kmeans_template_kmtut "generic, extensible, and powerful C++ class"
-   for complex usage
-
-@section toc_kmtut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_kmtut
- - \ref toc_kmtut
- - \ref cli_kmtut
-   - \ref cli_ex1_kmtut
-   - \ref cli_ex2_kmtut
-   - \ref cli_ex3_kmtut
-   - \ref cli_ex4_kmtut
-   - \ref cli_ex6_kmtut
-   - \ref cli_ex7_kmtut
- - \ref kmeans_kmtut
-   - \ref kmeans_ex1_kmtut
-   - \ref kmeans_ex2_kmtut
-   - \ref kmeans_ex3_kmtut
-   - \ref kmeans_ex5_kmtut
-   - \ref kmeans_ex6_kmtut
-   - \ref kmeans_ex7_kmtut
- - \ref kmeans_template_kmtut
-   - \ref kmeans_metric_kmtut
-   - \ref kmeans_initial_partition_kmtut
-   - \ref kmeans_empty_cluster_kmtut
-   - \ref kmeans_lloyd_kmtut
- - \ref further_doc_kmtut
-
-@section cli_kmtut Command-Line 'kmeans'
-
-\b mlpack provides a command-line executable, \c mlpack_kmeans, to allow easy
-execution of the k-means algorithm on data.  Complete documentation of the
-executable can be found by typing
-
-@code
-$ mlpack_kmeans --help
-@endcode
-
-As of October 2014, support for overclustering has been removed due to bugs and
-lack of usage.  If this is support you were using, or are interested, please
-file a bug or get in touch with the \b mlpack developers in some way so that the
-support can be re-implemented.
-
-Below are several examples demonstrating simple use of the \c mlpack_kmeans
-executable.
-
-@subsection cli_ex1_kmtut Simple k-means clustering
-
-We want to find 5 clusters using the points in the file dataset.csv.  By
-default, if any of the clusters end up empty, that cluster will be reinitialized
-to contain the point furthest from the cluster with maximum variance.  The
-cluster assignments of each point will be stored in assignments.csv.  Each row
-in assignments.csv will correspond to the row in dataset.csv.
-
-@code
-$ mlpack_kmeans -c 5 -i dataset.csv -v -o assignments.csv
-@endcode
-
-@subsection cli_ex2_kmtut Saving the resulting centroids
-
-Sometimes it is useful to save the centroids of the clusters found by k-means;
-one example might be for plotting the points.  The \c -C (\c --centroid_file)
-option allows specification of a file into which the centroids will be saved
-(one centroid per line, if it is a CSV or other text format).
-
-@code
-$ mlpack_kmeans -c 5 -i dataset.csv -v -o assignments.csv -C centroids.csv
-@endcode
-
-@subsection cli_ex3_kmtut Allowing empty clusters
-
-If you would like to allow empty clusters to exist, instead of reinitializing
-them, simply specify the \c -e (\c --allow_empty_clusters) option.  Note that
-when you save your clusters, even empty clusters will still have centroids.
-The centroids of the empty cluster will be the same as what they were on the
-last iteration when the cluster was not empty.
-
-@code
-$ mlpack_kmeans -c 5 -i dataset.csv -v -e -o assignments.csv -C centroids.csv
-@endcode
-
-@subsection cli_ex3a_kmtut Killing empty clusters
-
-If you would like to kill empty clusters , instead of reinitializing
-them, simply specify the \c -E (\c --kill_empty_clusters) option.  Note that
-when you save your clusters, all the empty clusters will be removed and the
-final result may contain less than specified number of clusters.
-
-@code
-$ mlpack_kmeans -c 5 -i dataset.csv -v -E -o assignments.csv -C centroids.csv
-@endcode
-
-@subsection cli_ex4_kmtut Limiting the maximum number of iterations
-
-As mentioned earlier, the k-means algorithm can often fail to converge.  In such
-a situation, it may be useful to stop the algorithm by way of limiting the
-maximum number of iterations.  This can be done with the \c -m (\c
---max_iterations) parameter, which is set to 1000 by default.  If the maximum
-number of iterations is 0, the algorithm will run until convergence -- or
-potentially forever.  The example below sets a maximum of 250 iterations.
-
-@code
-$ mlpack_kmeans -c 5 -i dataset.csv -v -o assignments.csv -m 250
-@endcode
-
-@subsection cli_ex6_kmtut Using Bradley-Fayyad "refined start"
-
-The method proposed by Bradley and Fayyad in their paper "Refining initial
-points for k-means clustering" is implemented in \b mlpack.  This strategy
-samples points from the dataset and runs k-means clustering on those points
-multiple times, saving the resulting clusters.  Then, k-means clustering is run
-on those clusters, yielding the original number of clusters.  The centroids of
-those resulting clusters are used as initial centroids for k-means clustering on
-the entire dataset.
-
-This technique generally gives better initial points than the default random
-partitioning, but depending on the parameters, it can take much longer.  This
-initialization technique is enabled with the \c -r (\c --refined_start) option.
-The \c -S (\c --samplings) parameter controls how many samplings of the dataset
-are performed, and the \c -p (\c --percentage) parameter controls how much of
-the dataset is randomly sampled for each sampling (it must be between 0.0 and
-1.0).  For more information on the refined start technique, see the paper
-referenced in the introduction of this tutorial.
-
-The example below performs k-means clustering, giving 5 clusters, using the
-refined start technique, sampling 10% of the dataset 25 times to produce the
-initial centroids.
-
-@code
-$ mlpack_kmeans -c 5 -i dataset.csv -v -o assignments.csv -r -S 25 -p 0.2
-@endcode
-
-@subsection cli_ex7_kmtut Using different k-means algorithms
-
-The \c mlpack_kmeans program implements six different strategies for
-clustering; each of these gives the exact same results, but will have different
-runtimes.  The particular algorithm to use can be specified with the \c -a or
-\c --algorithm option.  The choices are:
-
- - \c naive: the standard Lloyd iteration; takes \f$O(kN)\f$ time per iteration.
- - \c pelleg-moore: the 'blacklist' algorithm, which builds a kd-tree on the
-   data.  This can be fast when k is small and the dimensionality is reasonably
-   low.
- - \c elkan: Elkan's algorithm for k-means, which maintains upper and lower
-   distance bounds between each point and each centroid.  This can be very fast,
-   but it does not scale well to the case of large N or k, and uses a lot of
-   memory.
- - \c hamerly: Hamerly's algorithm is a variant of Elkan's algorithm that
-   handles memory usage much better and thus can operate with much larger
-   datasets than Elkan's algorithm.
- - \c dualtree: The dual-tree algorithm for k-means builds a kd-tree on both the
-   centroids and the points in order to prune away as much work as possible.
-   This algorithm is most effective when both N and k are large.
- - \c dualtree-covertree: This is the dual-tree algorithm using cover trees
-   instead of kd-trees.  It satisfies the runtime guarantees specified in the
-   dual-tree k-means paper.
-
-In general, the \c naive algorithm will be much slower than the others on
-datasets that are larger than tiny.
-
-The example below uses the \c dualtree algorithm to perform k-means clustering
-with 5 clusters on the dataset in \c dataset.csv, using the initial centroids in
-\c initial_centroids.csv, saving the resulting cluster assignments to
-\c assignments.csv:
-
-@code
-$ mlpack_kmeans -i dataset.csv -c 5 -v -I initial_centroids.csv -a dualtree \
-> -o assignments.csv
-@endcode
-
-@section kmeans_kmtut The 'KMeans' class
-
-The \c KMeans<> class (with default template parameters) provides a simple way
-to run k-means clustering using \b mlpack in C++.  The default template
-parameters for \c KMeans<> will initialize cluster assignments randomly and
-disallow empty clusters.  When an empty cluster is encountered, the point
-furthest from the cluster with maximum variance is set to the centroid of the
-empty cluster.
-
-@subsection kmeans_ex1_kmtut Running k-means and getting cluster assignments
-
-The simplest way to use the \c KMeans<> class is to pass in a dataset and a
-number of clusters, and receive the cluster assignments in return.  Note that
-the dataset must be column-major -- that is, one column corresponds to one
-point.  See \ref matrices "the matrices guide" for more information.
-
-@code
-#include <mlpack/methods/kmeans/kmeans.hpp>
-
-using namespace mlpack::kmeans;
-
-// The dataset we are clustering.
-extern arma::mat data;
-// The number of clusters we are getting.
-extern size_t clusters;
-
-// The assignments will be stored in this vector.
-arma::Row<size_t> assignments;
-
-// Initialize with the default arguments.
-KMeans<> k;
-k.Cluster(data, clusters, assignments);
-@endcode
-
-Now, the vector \c assignments holds the cluster assignments of each point in
-the dataset.
-
-@subsection kmeans_ex2_kmtut Running k-means and getting centroids of clusters
-
-Often it is useful to not only have the cluster assignments, but the centroids
-of each cluster.  Another overload of \c Cluster() makes this easily possible:
-
-@code
-#include <mlpack/methods/kmeans/kmeans.hpp>
-
-using namespace mlpack::kmeans;
-
-// The dataset we are clustering.
-extern arma::mat data;
-// The number of clusters we are getting.
-extern size_t clusters;
-
-// The assignments will be stored in this vector.
-arma::Row<size_t> assignments;
-// The centroids will be stored in this matrix.
-arma::mat centroids;
-
-// Initialize with the default arguments.
-KMeans<> k;
-k.Cluster(data, clusters, assignments, centroids);
-@endcode
-
-Note that the centroids matrix has columns equal to the number of clusters and
-rows equal to the dimensionality of the dataset.  Each column represents the
-centroid of the according cluster -- \c centroids.col(0) represents the
-centroid of the first cluster.
-
-@subsection kmeans_ex3_kmtut Limiting the maximum number of iterations
-
-The first argument to the constructor allows specification of the maximum number
-of iterations.  This is useful because often, the k-means algorithm does not
-converge, and is terminated after a number of iterations.  Setting this
-parameter to 0 indicates that the algorithm will run until convergence -- note
-that in some cases, convergence may never happen.  The default maximum number of
-iterations is 1000.
-
-@code
-// The first argument is the maximum number of iterations.  Here we set it to
-// 500 iterations.
-KMeans<> k(500);
-@endcode
-
-Then you can run \c Cluster() as normal.
-
-@subsection kmeans_ex5_kmtut Setting initial cluster assignments
-
-If you have an initial guess for the cluster assignments for each point, you can
-fill the assignments vector with the guess and then pass an extra boolean
-(initialAssignmentGuess) as true to the \c Cluster() method.  Below are examples
-for either overload of \c Cluster().
-
-@code
-#include <mlpack/methods/kmeans/kmeans.hpp>
-
-using namespace mlpack::kmeans;
-
-// The dataset we are clustering on.
-extern arma::mat dataset;
-// The number of clusters we are obtaining.
-extern size_t clusters;
-
-// A vector pre-filled with initial assignment guesses.
-extern arma::Row<size_t> assignments;
-
-KMeans<> k;
-
-// The boolean set to true indicates that our assignments vector is filled with
-// initial guesses.
-k.Cluster(dataset, clusters, assignments, true);
-@endcode
-
-@code
-#include <mlpack/methods/kmeans/kmeans.hpp>
-
-using namespace mlpack::kmeans;
-
-// The dataset we are clustering on.
-extern arma::mat dataset;
-// The number of clusters we are obtaining.
-extern size_t clusters;
-
-// A vector pre-filled with initial assignment guesses.
-extern arma::Row<size_t> assignments;
-
-// This will hold the centroids of the finished clusters.
-arma::mat centroids;
-
-KMeans<> k;
-
-// The boolean set to true indicates that our assignments vector is filled with
-// initial guesses.
-k.Cluster(dataset, clusters, assignments, centroids, true);
-@endcode
-
-@note
-If you have a heuristic or algorithm which makes initial guesses, a
-more elegant solution is to create a new class fulfilling the
-InitialPartitionPolicy template policy.  See \ref kmeans_initial_partition_kmtut
-"the section about changing the initial partitioning strategy" for more details.
-
-@par
-
-@note
-If you set the InitialPartitionPolicy parameter to something other than the
-default but give an initial cluster assignment guess, the InitialPartitionPolicy
-will not be used to initialize the algorithm.  See \ref kmeans_initial_partition_kmtut
-"the section about changing the initial partitioning strategy"
-for more details.
-
-@subsection kmeans_ex6_kmtut Setting initial cluster centroids
-
-An equally important option to being able to make initial cluster assignment
-guesses is to make initial cluster centroid guesses without having to assign
-each point in the dataset to an initial cluster.  This is similar to the
-previous section, but now you must pass two extra booleans -- the first
-(initialAssignmentGuess) as false, indicating that there are not initial cluster
-assignment guesses, and the second (initialCentroidGuess) as true, indicating
-that the centroids matrix is filled with initial centroid guesses.
-
-This, of course, only works with the overload of \c Cluster() that takes a
-matrix to put the resulting centroids in.  Below is an example.
-
-@code
-#include <mlpack/methods/kmeans/kmeans.hpp>
-
-using namespace mlpack::kmeans;
-
-// The dataset we are clustering on.
-extern arma::mat dataset;
-// The number of clusters we are obtaining.
-extern size_t clusters;
-
-// A matrix pre-filled with guesses for the initial cluster centroids.
-extern arma::mat centroids;
-
-// This will be filled with the final cluster assignments for each point.
-arma::Row<size_t> assignments;
-
-KMeans<> k;
-
-// Remember, the first boolean indicates that we are not giving initial
-// assignment guesses, and the second boolean indicates that we are giving
-// initial centroid guesses.
-k.Cluster(dataset, clusters, assignments, centroids, false, true);
-@endcode
-
-@note
-If you have a heuristic or algorithm which makes initial guesses, a
-more elegant solution is to create a new class fulfilling the
-InitialPartitionPolicy template policy.  See \ref kmeans_initial_partition_kmtut
-"the section about changing the initial partitioning strategy" for more details.
-
-@par
-
-@note
-If you set the InitialPartitionPolicy parameter to something other than the
-default but give an initial cluster centroid guess, the InitialPartitionPolicy
-will not be used to initialize the algorithm.  See \ref kmeans_initial_partition_kmtut
-"the section about changing the initial partitioning strategy" for more details.
-
-@subsection kmeans_ex7_kmtut Running sparse k-means
-
-The \c Cluster() function can work on both sparse and dense matrices, so all of
-the above examples can be used with sparse matrices instead, if the fifth
-template parameter is modified.  Below is a simple example.  Note that the
-centroids are returned as a dense matrix, because the centroids of collections
-of sparse points are not generally sparse.
-
-@code
-// The sparse dataset.
-extern arma::sp_mat sparseDataset;
-// The number of clusters.
-extern size_t clusters;
-
-// The assignments will be stored in this vector.
-arma::Row<size_t> assignments;
-// The centroids of each cluster will be stored in this sparse matrix.
-arma::sp_mat sparseCentroids;
-
-// We must change the fifth (and last) template parameter.
-KMeans<metric::EuclideanDistance, SampleInitialization, MaxVarianceNewCluster,
-       NaiveKMeans, arma::sp_mat> k;
-k.Cluster(sparseDataset, clusters, assignments, sparseCentroids);
-@endcode
-
-@section kmeans_template_kmtut Template parameters for the 'KMeans' class
-
-The \c KMeans<> class also takes three template parameters, which can be
-modified to change the behavior of the k-means algorithm.  There are three
-template parameters:
-
- - \c MetricType: controls the distance metric used for clustering (by
-   default, the squared Euclidean distance is used)
- - \c InitialPartitionPolicy: the method by which initial clusters are set; by
-   default, \ref mlpack::kmeans::SampleInitialization "SampleInitialization" is
-   used
- - \c EmptyClusterPolicy: the action taken when an empty cluster is encountered;
-   by default, \ref mlpack::kmeans::MaxVarianceNewCluster "MaxVarianceNewCluster"
-   is used
- - \c LloydStepType: this defines the strategy used to make a single Lloyd
-   iteration; by default this is the typical Lloyd iteration specified in
-   \ref mlpack::kmeans::NaiveKMeans "NaiveKMeans"
- - \c MatType: type of data matrix to use for clustering
-
-The class is defined like below:
-
-@code
-template<
-  typename DistanceMetric = mlpack::metric::SquaredEuclideanDistance,
-  typename InitialPartitionPolicy = SampleInitialization,
-  typename EmptyClusterPolicy = MaxVarianceNewCluster,
-  template<class, class> class LloydStepType = NaiveKMeans,
-  typename MatType = arma::mat
->
-class KMeans;
-@endcode
-
-In the following sections, each policy is described further, with examples of
-how to modify them.
-
-@subsection kmeans_metric_kmtut Changing the distance metric used for k-means
-
-Most machine learning algorithms in \b mlpack support modifying the distance
-metric, and \c KMeans<> is no exception.  Similar to \ref
-mlpack::neighbor::NeighborSearch "NeighborSearch" (see \ref
-metric_type_doc_nstut "the section in the NeighborSearch tutorial"), any class
-in mlpack::metric can be given as an argument.  The mlpack::metric::LMetric
-class is a good example implementation.
-
-A class fulfilling the MetricType policy must provide the following two
-functions:
-
-@code
-// Empty constructor is required.
-MetricType();
-
-// Computer the distance between two points.
-template<typename VecType>
-double Evaluate(const VecType& a, const VecType& b);
-@endcode
-
-Most of the standard metrics that could be used are stateless and therefore the
-\c Evaluate() method is implemented statically.  However, there are metrics,
-such as the Mahalanobis distance (mlpack::metric::MahalanobisDistance), that
-store state.  To this end, an instantiated MetricType object is stored within the
-\c KMeans class.  The example below shows how to pass an instantiated
-MahalanobisDistance in the constructor.
-
-@code
-// The initialized Mahalanobis distance.
-extern mlpack::metric::MahalanobisDistance distance;
-
-// We keep the default arguments for the maximum number of iterations, but pass
-// our instantiated metric.
-KMeans<mlpack::metric::MahalanobisDistance> k(1000, distance);
-@endcode
-
-@note
-While the MetricType policy only requires two methods, one of which is an empty
-constructor, more can always be added.  mlpack::metric::MahalanobisDistance also
-has constructors with parameters, because it is a stateful metric.
-
-@subsection kmeans_initial_partition_kmtut Changing the initial partitioning strategy used for k-means
-
-There have been many initial cluster strategies for k-means proposed in the
-literature.  Fortunately, the \c KMeans<> class makes it very easy to implement
-one of these methods and plug it in without needing to modify the existing
-algorithm code at all.
-
-By default, the \c KMeans<> class uses mlpack::kmeans::SampleInitialization,
-which randomly samples points as initial centroids.  However, writing a new
-policy is simple; it needs to only implement the following functions:
-
-@code
-// Empty constructor is required.
-InitialPartitionPolicy();
-
-// Only *one* of the following two functions is required!  You should implement
-// whichever you find more convenient to implement.
-
-// This function is called to initialize the clusters and returns centroids.
-template<typename MatType>
-void Cluster(MatType& data,
-             const size_t clusters,
-             arma::mat& centroids);
-
-// This function is called to initialize the clusters and returns individual
-// point assignments.  The centroids will then be calculated from the given
-// assignments.
-template<typename MatType>
-void Cluster(MatType& data,
-             const size_t clusters,
-             arma::Row<size_t> assignments);
-@endcode
-
-The templatization of the \c Cluster() function allows both dense and sparse
-matrices to be passed in.  If the desired policy does not work with sparse (or
-dense) matrices, then the method can be written specifically for one type of
-matrix -- however, be warned that if you try to use \c KMeans with that policy
-and the wrong type of matrix, you will get many ugly compilation errors!
-
-@code
-// The Cluster() function specialized for dense matrices.
-void Cluster(arma::mat& data,
-             const size_t clusters,
-             arma::Row<size_t> assignments);
-@endcode
-
-Note that only one of the two possible \c Cluster() functions are required.
-This is because sometimes it is easier to express an initial partitioning policy
-as something that returns point assignments, and sometimes it is easier to
-express the policy as something that returns centroids.  The KMeans<> class will
-use whichever of these two functions is given; if both are given, the overload
-that returns centroids will be preferred.
-
-One alternate to the default SampleInitialization policy is the RefinedStart
-policy, which is an implementation of the Bradley and Fayyad approach for
-finding initial points detailed in "Refined initial points for k-means
-clustering" and other places in this document.  Another option is the
-RandomPartition class, which randomly assigns points to clusters, but this may
-not work very well for most settings.  See the documentation for
-mlpack::kmeans::RefinedStart and mlpack::kmeans::RandomPartition for more
-information.
-
-If the \c Cluster() method returns point assignments instead of centroids, then
-valid initial assignments must be returned for every point in the dataset.
-
-As with the MetricType template parameter, an initialized InitialPartitionPolicy
-can be passed to the constructor of \c KMeans as a fourth argument.
-
-@subsection kmeans_empty_cluster_kmtut Changing the action taken when an empty cluster is encountered
-
-Sometimes, during clustering, a situation will arise where a cluster has no
-points in it.  The \c KMeans class allows easy customization of the action to be
-taken when this occurs.  By default, the point furthest from the centroid of the
-cluster with maximum variance is taken as the centroid of the empty cluster;
-this is implemented in the mlpack::kmeans::MaxVarianceNewCluster class.  Another
-alternate choice is the mlpack::kmeans::AllowEmptyClusters class, which simply
-allows empty clusters to persist.
-
-A custom policy can be written and it must implement the following methods:
-
-@code
-// Empty constructor is required.
-EmptyClusterPolicy();
-
-// This function is called when an empty cluster is encountered.  emptyCluster
-// indicates the cluster which is empty, and then the clusterCounts and
-// assignments are meant to be modified by the function.  The function should
-// return the number of modified points.
-template<typename MatType>
-size_t EmptyCluster(const MatType& data,
-                    const size_t emptyCluster,
-                    const MatType& centroids,
-                    arma::Col<size_t>& clusterCounts,
-                    arma::Row<size_t>& assignments);
-@endcode
-
-The \c EmptyCluster() function is called for each cluster that is empty at each
-iteration of the algorithm.  As with InitialPartitionPolicy, the \c
-EmptyCluster() function does not need to be generalized to support both dense
-and sparse matrices -- but usage with the wrong type of matrix will cause
-compilation errors.
-
-Like the other template parameters to \c KMeans, EmptyClusterPolicy
-implementations that have state can be passed to the constructor of \c KMeans as
-a fifth argument.  See the kmeans::KMeans documentation for further details.
-
-@subsection kmeans_lloyd_kmtut The LloydStepType template parameter
-
-The internal algorithm used for a single step of the k-means algorithm can
-easily be changed; \b mlpack implements several existing classes that satisfy
-the \c LloydStepType policy:
-
- - mlpack::kmeans::NaiveKMeans
- - mlpack::kmeans::ElkanKMeans
- - mlpack::kmeans::HamerlyKMeans
- - mlpack::kmeans::PellegMooreKMeans
- - mlpack::kmeans::DualTreeKMeans
-
-Note that the \c LloydStepType policy is itself a template template parameter,
-and must accept two template parameters of its own:
-
- - \c MetricType: the type of metric to use
- - \c MatType: the type of data matrix to use
-
-The \c LloydStepType policy also mandates three functions:
-
- - a constructor: <tt>LloydStepType(const MatType& dataset, MetricType&
-   metric);</tt>
- - an \c Iterate() function:
-
-@code
-/**
- * Run a single iteration of the Lloyd algorithm, updating the given centroids
- * into the newCentroids matrix.  If any cluster is empty (that is, if any
- * cluster has no points assigned to it), then the centroid associated with
- * that cluster may be filled with invalid data (it will be corrected later).
- *
- * @param centroids Current cluster centroids.
- * @param newCentroids New cluster centroids.
- * @param counts Number of points in each cluster at the end of the iteration.
- */
-double Iterate(const arma::mat& centroids,
-               arma::mat& newCentroids,
-               arma::Col<size_t>& counts);
-@endcode
-
- - a function to get the number of distance calculations:
-
-@code
-size_t DistanceCalculations() const { return distanceCalculations; }
-@endcode
-
-Note that \c Iterate() does not need to return valid centroids if the cluster is
-empty.  This is because \c EmptyClusterPolicy will handle the empty centroid.
-This behavior can be used to avoid small amounts of computation.
-
-For examples, see the five aforementioned implementations of classes that
-satisfy the \c LloydStepType policy.
-
-@section further_doc_kmtut Further documentation
-
-For further documentation on the KMeans class, consult the \ref
-mlpack::kmeans::KMeans "complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/kmeans.md 4.0.1-1/doc/tutorials/kmeans.md
--- 3.4.2-7/doc/tutorials/kmeans.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/kmeans.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,645 @@
+# K-Means Tutorial
+
+The popular k-means algorithm for clustering has been around since the late
+1950s, and the standard algorithm was proposed by Stuart Lloyd in 1957.  Given a
+set of points `X`, k-means clustering aims to partition each point `x_i` into a
+cluster `c_j` (where `j <= k` and `k`, the number of clusters, is a parameter).
+The partitioning is done to minimize the objective function
+
+```
+sum_j^k sum_{x_i in c_j} || x_i - m_j ||^2
+```
+
+where `m_j` is the centroid of cluster `c_j`.  The standard algorithm
+is a two-step algorithm:
+
+ - *Assignment* step.  Each point `x_i` in `X` is assigned to the cluster whose
+   centroid it is closest to.
+
+ - *Update* step.  Using the new cluster assignments, the centroids of each
+   cluster are recalculated.
+
+The algorithm has converged when no more assignment changes are happening with
+each iteration.  However, this algorithm can get stuck in local minima of the
+objective function and is particularly sensitive to the initial cluster
+assignments.  Also, situations can arise where the algorithm will never converge
+but reaches steady state---for instance, one point may be changing between two
+cluster assignments.
+
+There is vast literature on the k-means algorithm and its uses, as well as
+strategies for choosing initial points effectively and keeping the algorithm
+from converging in local minima.  mlpack does implement some of these, notably
+the Bradley-Fayyad algorithm (see the reference below) for choosing refined
+initial points.  Importantly, the C++ `KMeans` class makes it very easy to
+improve the k-means algorithm in a modular way.
+
+```c++
+@inproceedings{bradley1998refining,
+  title={Refining initial points for k-means clustering},
+  author={Bradley, Paul S. and Fayyad, Usama M.},
+  booktitle={Proceedings of the Fifteenth International Conference on Machine
+      Learning (ICML 1998)},
+  volume={66},
+  year={1998}
+}
+```
+
+mlpack provides:
+
+ - a simple command-line executable to run k-means
+ - a simple C++ interface to run k-means
+ - a generic, extensible, and powerful C++ class for complex usage
+
+## Command-line `mlpack_kmeans
+
+mlpack provides a command-line executable, `mlpack_kmeans`, to allow easy
+execution of the k-means algorithm on data.  Complete documentation of the
+executable can be found by typing
+
+```sh
+$ mlpack_kmeans --help
+```
+
+Note that mlpack also has bindings to other languages and provides, e.g., the
+`kmeans()` function in Python that is very similar to the `mlpack_kmeans`
+command-line program.  So each example below can be easily adapted to another
+language.
+
+Below are several examples demonstrating simple use of the `mlpack_kmeans`
+executable.
+
+### Simple k-means clustering
+
+We want to find 5 clusters using the points in the file `dataset.csv`.  By
+default, if any of the clusters end up empty, that cluster will be reinitialized
+to contain the point furthest from the cluster with maximum variance.  The
+cluster assignments of each point will be stored in `assignments.csv`.  Each row
+in assignments.csv will correspond to the row in `dataset.csv`.
+
+```sh
+$ mlpack_kmeans -c 5 -i dataset.csv -v -o assignments.csv
+```
+
+### Saving the resulting centroids
+
+Sometimes it is useful to save the centroids of the clusters found by k-means;
+one example might be for plotting the points.  The `-C` (`--centroid_file`)
+option allows specification of a file into which the centroids will be saved
+(one centroid per line, if it is a CSV or other text format).
+
+```sh
+$ mlpack_kmeans -c 5 -i dataset.csv -v -o assignments.csv -C centroids.csv
+```
+
+### Allowing empty clusters
+
+If you would like to allow empty clusters to exist, instead of reinitializing
+them, simply specify the `-e` (`--allow_empty_clusters`) option.  Note that when
+you save your clusters, even empty clusters will still have centroids.  The
+centroids of the empty cluster will be the same as what they were on the last
+iteration when the cluster was not empty.
+
+```sh
+$ mlpack_kmeans -c 5 -i dataset.csv -v -e -o assignments.csv -C centroids.csv
+```
+
+### Killing empty clusters
+
+If you would like to kill empty clusters, instead of reinitializing them, simply
+specify the `-E` (`--kill_empty_clusters`) option.  Note that when you save your
+clusters, all the empty clusters will be removed and the final result may
+contain less than specified number of clusters.
+
+```sh
+$ mlpack_kmeans -c 5 -i dataset.csv -v -E -o assignments.csv -C centroids.csv
+```
+
+### Limiting the maximum number of iterations
+
+As mentioned earlier, the k-means algorithm can often fail to converge.  In such
+a situation, it may be useful to stop the algorithm by way of limiting the
+maximum number of iterations.  This can be done with the `-m`
+(`--max_iterations`) parameter, which is set to 1000 by default.  If the maximum
+number of iterations is 0, the algorithm will run until convergence---or
+potentially forever.  The example below sets a maximum of 250 iterations.
+
+```sh
+$ mlpack_kmeans -c 5 -i dataset.csv -v -o assignments.csv -m 250
+```
+
+### Using Bradley-Fayyad 'refined start'
+
+The method proposed by Bradley and Fayyad in their paper "Refining initial
+points for k-means clustering" is implemented in mlpack.  This strategy samples
+points from the dataset and runs k-means clustering on those points multiple
+times, saving the resulting clusters.  Then, k-means clustering is run on those
+clusters, yielding the original number of clusters.  The centroids of those
+resulting clusters are used as initial centroids for k-means clustering on the
+entire dataset.
+
+This technique generally gives better initial points than the default random
+partitioning, but depending on the parameters, it can take much longer.  This
+initialization technique is enabled with the `-r` (`--refined_start`) option.
+The `-S` (`--samplings`) parameter controls how many samplings of the dataset
+are performed, and the `-p` (`--percentage`) parameter controls how much of the
+dataset is randomly sampled for each sampling (it must be between 0.0 and 1.0).
+For more information on the refined start technique, see the paper referenced in
+the introduction of this tutorial.
+
+The example below performs k-means clustering, giving 5 clusters, using the
+refined start technique, sampling 10% of the dataset 25 times to produce the
+initial centroids.
+
+```sh
+$ mlpack_kmeans -c 5 -i dataset.csv -v -o assignments.csv -r -S 25 -p 0.2
+```
+
+### Using different k-means algorithms
+
+The `mlpack_kmeans` program implements six different strategies for clustering;
+each of these gives the exact same results, but will have different runtimes.
+The particular algorithm to use can be specified with the `-a` or `--algorithm`
+option.  The choices are:
+
+ - `naive`: the standard Lloyd iteration; takes `O(kN)` time per iteration.
+ - `pelleg-moore`: the 'blacklist' algorithm, which builds a kd-tree on the
+   data.  This can be fast when k is small and the dimensionality is reasonably
+   low.
+ - `elkan`: Elkan's algorithm for k-means, which maintains upper and lower
+   distance bounds between each point and each centroid.  This can be very fast,
+   but it does not scale well to the case of large N or k, and uses a lot of
+   memory.
+ - `hamerly`: Hamerly's algorithm is a variant of Elkan's algorithm that
+   handles memory usage much better and thus can operate with much larger
+   datasets than Elkan's algorithm.
+ - `dualtree`: The dual-tree algorithm for k-means builds a kd-tree on both the
+   centroids and the points in order to prune away as much work as possible.
+   This algorithm is most effective when both N and k are large.
+ - `dualtree-covertree`: This is the dual-tree algorithm using cover trees
+   instead of kd-trees.  It satisfies the runtime guarantees specified in the
+   dual-tree k-means paper.
+
+In general, the `naive` algorithm will be much slower than the others on
+datasets that are larger than tiny.
+
+The example below uses the `dualtree` algorithm to perform k-means clustering
+with 5 clusters on the dataset in `dataset.csv`, using the initial centroids in
+`initial_centroids.csv`, saving the resulting cluster assignments to
+`assignments.csv`:
+
+```sh
+$ mlpack_kmeans -i dataset.csv -c 5 -v -I initial_centroids.csv -a dualtree \
+> -o assignments.csv
+```
+
+## The `KMeans` class
+
+The `KMeans<>` class (with default template parameters) provides a simple way
+to run k-means clustering using mlpack in C++.  The default template
+parameters for `KMeans<>` will initialize cluster assignments randomly and
+disallow empty clusters.  When an empty cluster is encountered, the point
+furthest from the cluster with maximum variance is set to the centroid of the
+empty cluster.
+
+### Running k-means and getting cluster assignments
+
+The simplest way to use the `KMeans<>` class is to pass in a dataset and a
+number of clusters, and receive the cluster assignments in return.  Note that
+the dataset must be column-major---that is, one column corresponds to one point.
+See [the matrices guide](../user/matrices.md) for more information.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The dataset we are clustering.
+extern arma::mat data;
+// The number of clusters we are getting.
+extern size_t clusters;
+
+// The assignments will be stored in this vector.
+arma::Row<size_t> assignments;
+
+// Initialize with the default arguments.
+KMeans<> k;
+k.Cluster(data, clusters, assignments);
+```
+
+Now, the vector `assignments` holds the cluster assignments of each point in the
+dataset.
+
+### Running k-means and getting centroids of clusters
+
+Often it is useful to not only have the cluster assignments, but the centroids
+of each cluster.  Another overload of `Cluster()` makes this easily possible:
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The dataset we are clustering.
+extern arma::mat data;
+// The number of clusters we are getting.
+extern size_t clusters;
+
+// The assignments will be stored in this vector.
+arma::Row<size_t> assignments;
+// The centroids will be stored in this matrix.
+arma::mat centroids;
+
+// Initialize with the default arguments.
+KMeans<> k;
+k.Cluster(data, clusters, assignments, centroids);
+```
+
+Note that the centroids matrix has columns equal to the number of clusters and
+rows equal to the dimensionality of the dataset.  Each column represents the
+centroid of the according cluster---`centroids.col(0)` represents the centroid
+of the first cluster.
+
+### Limiting the maximum number of iterations
+
+The first argument to the constructor allows specification of the maximum number
+of iterations.  This is useful because often, the k-means algorithm does not
+converge, and is terminated after a number of iterations.  Setting this
+parameter to 0 indicates that the algorithm will run until convergence---note
+that in some cases, convergence may never happen.  The default maximum number of
+iterations is 1000.
+
+```c++
+// The first argument is the maximum number of iterations.  Here we set it to
+// 500 iterations.
+KMeans<> k(500);
+```
+
+Then you can run `Cluster()` as normal.
+
+### Setting initial cluster assignments
+
+If you have an initial guess for the cluster assignments for each point, you can
+fill the assignments vector with the guess and then pass an extra boolean
+(`initialAssignmentGuess`) as `true` to the `Cluster()` method.  Below are
+examples for either overload of `Cluster()`.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The dataset we are clustering on.
+extern arma::mat dataset;
+// The number of clusters we are obtaining.
+extern size_t clusters;
+
+// A vector pre-filled with initial assignment guesses.
+extern arma::Row<size_t> assignments;
+
+KMeans<> k;
+
+// The boolean set to true indicates that our assignments vector is filled with
+// initial guesses.
+k.Cluster(dataset, clusters, assignments, true);
+```
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The dataset we are clustering on.
+extern arma::mat dataset;
+// The number of clusters we are obtaining.
+extern size_t clusters;
+
+// A vector pre-filled with initial assignment guesses.
+extern arma::Row<size_t> assignments;
+
+// This will hold the centroids of the finished clusters.
+arma::mat centroids;
+
+KMeans<> k;
+
+// The boolean set to true indicates that our assignments vector is filled with
+// initial guesses.
+k.Cluster(dataset, clusters, assignments, centroids, true);
+```
+
+***Note***: If you have a heuristic or algorithm which makes initial guesses, a
+more elegant solution is to create a new class fulfilling the
+`InitialPartitionPolicy` template policy.  See the section about changing the
+initial partitioning strategy for more details.
+
+***Note***: If you set the `InitialPartitionPolicy` parameter to something other
+than the default but give an initial cluster assignment guess, the
+`InitialPartitionPolicy` will not be used to initialize the algorithm.  See the
+section about changing the initial partitioning strategy for more details.
+
+### Setting initial cluster centroids
+
+An equally important option to being able to make initial cluster assignment
+guesses is to make initial cluster centroid guesses without having to assign
+each point in the dataset to an initial cluster.  This is similar to the
+previous section, but now you must pass two extra booleans---the first
+(`initialAssignmentGuess`) as `false`, indicating that there are not initial
+cluster assignment guesses, and the second (`initialCentroidGuess`) as `true`,
+indicating that the centroids matrix is filled with initial centroid guesses.
+
+This, of course, only works with the overload of `Cluster()` that takes a matrix
+to put the resulting centroids in.  Below is an example.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// The dataset we are clustering on.
+extern arma::mat dataset;
+// The number of clusters we are obtaining.
+extern size_t clusters;
+
+// A matrix pre-filled with guesses for the initial cluster centroids.
+extern arma::mat centroids;
+
+// This will be filled with the final cluster assignments for each point.
+arma::Row<size_t> assignments;
+
+KMeans<> k;
+
+// Remember, the first boolean indicates that we are not giving initial
+// assignment guesses, and the second boolean indicates that we are giving
+// initial centroid guesses.
+k.Cluster(dataset, clusters, assignments, centroids, false, true);
+```
+
+***Note***: If you have a heuristic or algorithm which makes initial guesses, a
+more elegant solution is to create a new class fulfilling the
+`InitialPartitionPolicy` template policy.  See the section about changing the
+initial partitioning strategy for more details.
+
+***Note***: If you set the `InitialPartitionPolicy` parameter to something other
+than the default but give an initial cluster centroid guess, the
+`InitialPartitionPolicy` will not be used to initialize the algorithm.  See the
+section about changing the initial partitioning strategy for more details.
+
+### Running sparse k-means
+
+The `Cluster()` function can work on both sparse and dense matrices, so all of
+the above examples can be used with sparse matrices instead, if the fifth
+template parameter is modified.  Below is a simple example.  Note that the
+centroids are returned as a dense matrix, because the centroids of collections
+of sparse points are not generally sparse.
+
+```c++
+// The sparse dataset.
+extern arma::sp_mat sparseDataset;
+// The number of clusters.
+extern size_t clusters;
+
+// The assignments will be stored in this vector.
+arma::Row<size_t> assignments;
+// The centroids of each cluster will be stored in this sparse matrix.
+arma::sp_mat sparseCentroids;
+
+// We must change the fifth (and last) template parameter.
+KMeans<EuclideanDistance, SampleInitialization, MaxVarianceNewCluster,
+       NaiveKMeans, arma::sp_mat> k;
+k.Cluster(sparseDataset, clusters, assignments, sparseCentroids);
+```
+
+### Template parameters for the `KMeans` class
+
+The `KMeans<>` class also takes three template parameters, which can be
+modified to change the behavior of the k-means algorithm.  There are three
+template parameters:
+
+ - `MetricType`: controls the distance metric used for clustering (by default,
+   the squared Euclidean distance is used)
+ - `InitialPartitionPolicy`: the method by which initial clusters are set; by
+   default, `SampleInitialization` is used
+ - `EmptyClusterPolicy`: the action taken when an empty cluster is encountered;
+   by default, `MaxVarianceNewCluster` is used
+ - `LloydStepType`: this defines the strategy used to make a single Lloyd
+   iteration; by default this is the typical Lloyd iteration specified in
+   `NaiveKMeans`
+ - `MatType`: type of data matrix to use for clustering
+
+The class is defined like below:
+
+```c++
+template<
+  typename DistanceMetric = SquaredEuclideanDistance,
+  typename InitialPartitionPolicy = SampleInitialization,
+  typename EmptyClusterPolicy = MaxVarianceNewCluster,
+  template<class, class> class LloydStepType = NaiveKMeans,
+  typename MatType = arma::mat
+>
+class KMeans;
+```
+
+In the following sections, each policy is described further, with examples of
+how to modify them.
+
+### Changing the distance metric used for k-means
+
+Most machine learning algorithms in mlpack support modifying the distance
+metric, and `KMeans<>` is no exception.  Similar to `NeighborSearch` (see the
+section in the [NeighborSearch tutorial](neighbor_search.md)), any of mlpack's
+metric classes (found in `mlpack/core/metrics/`) can be given as an argument.
+The `LMetric` class is a good example implementation.
+
+A class fulfilling the [MetricType policy](../developer/metrictype.md) must
+provide the following two functions:
+
+```c++
+// Empty constructor is required.
+MetricType();
+
+// Compute the distance between two points.
+template<typename VecType>
+double Evaluate(const VecType& a, const VecType& b);
+```
+
+Most of the standard metrics that could be used are stateless and therefore the
+`Evaluate()` method is implemented statically.  However, there are metrics, such
+as the Mahalanobis distance (`MahalanobisDistance`), that store state.  To this
+end, an instantiated `MetricType` object is stored within the `KMeans` class.
+The example below shows how to pass an instantiated `MahalanobisDistance` in the
+constructor.
+
+```c++
+// The initialized Mahalanobis distance.
+extern MahalanobisDistance distance;
+
+// We keep the default arguments for the maximum number of iterations, but pass
+// our instantiated metric.
+KMeans<MahalanobisDistance> k(1000, distance);
+```
+
+***Note***: While the `MetricType` policy only requires two methods, one of
+which is an empty constructor, more can always be added.  `MahalanobisDistance`
+also has constructors with parameters, because it is a stateful metric.
+
+### Changing the initial partitioning strategy used for k-means
+
+There have been many initial cluster strategies for k-means proposed in the
+literature.  Fortunately, the `KMeans<>` class makes it very easy to implement
+one of these methods and plug it in without needing to modify the existing
+algorithm code at all.
+
+By default, the `KMeans<>` class uses `SampleInitialization`, which randomly
+samples points as initial centroids.  However, writing a new policy is simple;
+it needs to only implement the following functions:
+
+```c++
+// Empty constructor is required.
+InitialPartitionPolicy();
+
+// Only *one* of the following two functions is required!  You should implement
+// whichever you find more convenient to implement.
+
+// This function is called to initialize the clusters and returns centroids.
+template<typename MatType>
+void Cluster(MatType& data,
+             const size_t clusters,
+             arma::mat& centroids);
+
+// This function is called to initialize the clusters and returns individual
+// point assignments.  The centroids will then be calculated from the given
+// assignments.
+template<typename MatType>
+void Cluster(MatType& data,
+             const size_t clusters,
+             arma::Row<size_t> assignments);
+```
+
+The templatization of the `Cluster()` function allows both dense and sparse
+matrices to be passed in.  If the desired policy does not work with sparse (or
+dense) matrices, then the method can be written specifically for one type of
+matrix---however, be warned that if you try to use `KMeans` with that policy and
+the wrong type of matrix, you will get many ugly compilation errors!
+
+```c++
+// The Cluster() function specialized for dense matrices.
+void Cluster(arma::mat& data,
+             const size_t clusters,
+             arma::Row<size_t> assignments);
+```
+
+Note that only one of the two possible `Cluster()` functions are required.  This
+is because sometimes it is easier to express an initial partitioning policy as
+something that returns point assignments, and sometimes it is easier to express
+the policy as something that returns centroids.  The `KMeans<>` class will use
+whichever of these two functions is given; if both are given, the overload that
+returns centroids will be preferred.
+
+One alternate to the default `SampleInitialization` policy is the `RefinedStart`
+policy, which is an implementation of the Bradley and Fayyad approach for
+finding initial points detailed in "Refined initial points for k-means
+clustering" and other places in this document.  Another option is the
+`RandomPartition class`, which randomly assigns points to clusters, but this may
+not work very well for most settings.  See the documentation for
+`RefinedStart` and `RandomPartition` for more information.
+
+If the `Cluster()` method returns point assignments instead of centroids, then
+valid initial assignments must be returned for every point in the dataset.
+
+As with the `MetricType` template parameter, an initialized
+`InitialPartitionPolicy` can be passed to the constructor of `KMeans` as a
+fourth argument.
+
+### Changing the action taken when an empty cluster is encountered
+
+Sometimes, during clustering, a situation will arise where a cluster has no
+points in it.  The `KMeans` class allows easy customization of the action to be
+taken when this occurs.  By default, the point furthest from the centroid of the
+cluster with maximum variance is taken as the centroid of the empty cluster;
+this is implemented in the `MaxVarianceNewCluster` class.  Another alternate
+choice is the `AllowEmptyClusters` class, which simply allows empty clusters to
+persist.
+
+A custom policy can be written and it must implement the following methods:
+
+```c++
+// Empty constructor is required.
+EmptyClusterPolicy();
+
+// This function is called when an empty cluster is encountered.  emptyCluster
+// indicates the cluster which is empty, and then the clusterCounts and
+// assignments are meant to be modified by the function.  The function should
+// return the number of modified points.
+template<typename MatType>
+size_t EmptyCluster(const MatType& data,
+                    const size_t emptyCluster,
+                    const MatType& centroids,
+                    arma::Col<size_t>& clusterCounts,
+                    arma::Row<size_t>& assignments);
+```
+
+The `EmptyCluster()` function is called for each cluster that is empty at each
+iteration of the algorithm.  As with `InitialPartitionPolicy`, the
+`EmptyCluster()` function does not need to be generalized to support both dense
+and sparse matrices---but usage with the wrong type of matrix will cause
+compilation errors.
+
+Like the other template parameters to `KMeans`, `EmptyClusterPolicy`
+implementations that have state can be passed to the constructor of `KMeans` as
+a fifth argument.  See the `KMeans` documentation for further details.
+
+### The `LloydStepType` template parameter
+
+The internal algorithm used for a single step of the k-means algorithm can
+easily be changed; mlpack implements several existing classes that satisfy
+the `LloydStepType` policy:
+
+ - `NaiveKMeans`
+ - `ElkanKMeans`
+ - `HamerlyKMeans`
+ - `PellegMooreKMeans`
+ - `DualTreeKMeans`
+
+Note that the `LloydStepType` policy is itself a template template parameter,
+and must accept two template parameters of its own:
+
+ - `MetricType`: the type of metric to use
+ - `MatType`: the type of data matrix to use
+
+The `LloydStepType` policy also mandates three functions:
+
+ - a constructor: `LloydStepType(const MatType& dataset, MetricType& metric);`
+ - an `Iterate()` function:
+
+```c++
+/**
+ * Run a single iteration of the Lloyd algorithm, updating the given centroids
+ * into the newCentroids matrix.  If any cluster is empty (that is, if any
+ * cluster has no points assigned to it), then the centroid associated with
+ * that cluster may be filled with invalid data (it will be corrected later).
+ *
+ * @param centroids Current cluster centroids.
+ * @param newCentroids New cluster centroids.
+ * @param counts Number of points in each cluster at the end of the iteration.
+ */
+double Iterate(const arma::mat& centroids,
+               arma::mat& newCentroids,
+               arma::Col<size_t>& counts);
+```
+
+ - a function to get the number of distance calculations:
+
+```c++
+size_t DistanceCalculations() const { return distanceCalculations; }
+```
+
+Note that `Iterate()` does not need to return valid centroids if the cluster is
+empty.  This is because `EmptyClusterPolicy` will handle the empty centroid.
+This behavior can be used to avoid small amounts of computation.
+
+For examples, see the five aforementioned implementations of classes that
+satisfy the `LloydStepType` policy.
+
+## Further documentation
+
+For further documentation on the `KMeans` class, consult the comments in the
+source code, found in `mlpack/methods/kmeans/`.
diff -pruN 3.4.2-7/doc/tutorials/linear_regression/linear_regression.txt 4.0.1-1/doc/tutorials/linear_regression/linear_regression.txt
--- 3.4.2-7/doc/tutorials/linear_regression/linear_regression.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/linear_regression/linear_regression.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,418 +0,0 @@
-/*!
-
-@file linear_regression.txt
-@author James Cline
-@brief Tutorial for how to use the LinearRegression class.
-
-@page lrtutorial Linear/ridge regression tutorial (mlpack_linear_regression)
-
-@section intro_lrtut Introduction
-
-Linear regression and ridge regression are simple machine learning techniques
-that aim to estimate the parameters of a linear model.  Assuming we have \f$n\f$
-\b predictor points \f$\mathbf{x_i}, 0 \le i < n\f$ of dimensionality \f$d\f$
-and \f$n\f$ responses \f$y_i, 0 \le i < n\f$, we are trying to estimate the best
-fit for \f$\beta_i, 0 \le i \le d\f$ in the linear model
-
-\f[
-y_i = \beta_0 + \displaystyle\sum_{j = 1}^{d} \beta_j x_{ij}
-\f]
-
-for each predictor \f$\mathbf{x_i}\f$ and response \f$y_i\f$.  If we take each
-predictor \f$\mathbf{x_i}\f$ as a row in the matrix \f$\mathbf{X}\f$ and each
-response \f$y_i\f$ as an entry of the vector \f$\mathbf{y}\f$, we can represent
-the model in vector form:
-
-\f[
-\mathbf{y} = \mathbf{X} \mathbf{\beta} + \beta_0
-\f]
-
-The result of this method is the vector \f$\mathbf{\beta}\f$, including the
-offset term (or intercept term) \f$\beta_0\f$.
-
-\b mlpack provides:
-
- - a \ref cli_lrtut "simple command-line executable" to perform linear regression or ridge regression
- - a \ref linreg_lrtut "simple C++ interface" to perform linear regression or    ridge regression
-
-@section toc_lrtut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_lrtut
- - \ref toc_lrtut
- - \ref cli_lrtut
-   - \ref cli_ex1_lrtut
-   - \ref cli_ex2_lrtut
-   - \ref cli_ex3_lrtut
-   - \ref cli_ex4_lrtut
- - \ref linreg_lrtut
-   - \ref linreg_ex1_lrtut
-   - \ref linreg_ex2_lrtut
-   - \ref linreg_ex3_lrtut
-   - \ref linreg_ex4_lrtut
-   - \ref linreg_ex5_lrtut
- - \ref further_doc_lrtut
-
-@section cli_lrtut Command-Line 'mlpack_linear_regression'
-
-The simplest way to perform linear regression or ridge regression in \b mlpack
-is to use the \c mlpack_linear_regression executable.  This program will perform
-linear regression and place the resultant coefficients into one file.
-
-The output file holds a vector of coefficients in increasing order of dimension;
-that is, the offset term (\f$\beta_0\f$), the coefficient for dimension 1
-(\f$\beta_1\f$, then dimension 2 (\f$\beta_2\f$) and so forth, as well as the
-intercept.  This executable can also predict the \f$y\f$ values of a second
-dataset based on the computed coefficients.
-
-Below are several examples of simple usage (and the resultant output).  The
-\c option is used so that verbose output is given.  Further documentation on
-each individual option can be found by typing
-
-@code
-$ mlpack_linear_regression --help
-@endcode
-
-@subsection cli_ex1_lrtut One file, generating the function coefficients
-
-@code
-$ mlpack_linear_regression --training_file dataset.csv -v -M lr.xml
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 2 x 5.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   lambda: 0
-[INFO ]   output_model_file: lr.xml
-[INFO ]   output_predictions: predictions.csv
-[INFO ]   test_file: ""
-[INFO ]   training_file: dataset.csv
-[INFO ]   training_responses: ""
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   load_regressors: 0.000263s
-[INFO ]   loading_data: 0.000220s
-[INFO ]   regression: 0.000392s
-[INFO ]   total_time: 0.001920s
-@endcode
-
-Convenient program timers are given for different parts of the calculation at
-the bottom of the output, as well as the parameters the simulation was run with.
-Now, if we look at the output model file, which is \c lr.xml,
-
-@code
-$ cat dataset.csv
-0,0
-1,1
-2,2
-3,3
-4,4
-
-$ cat lr.xml
-<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
-<!DOCTYPE boost_serialization>
-<boost_serialization signature="serialization::archive" version="12">
-<linearRegressionModel class_id="0" tracking_level="0" version="0">
-  <parameters class_id="1" tracking_level="0" version="0">
-    <n_rows>2</n_rows>
-    <n_cols>1</n_cols>
-    <n_elem>2</n_elem>
-    <vec_state>1</vec_state>
-    <item>-3.97205464519563669e-16</item>
-    <item>1.00000000000000022e+00</item>
-  </parameters>
-  <lambda>0.00000000000000000e+00</lambda>
-  <intercept>1</intercept>
-</linearRegressionModel>
-</boost_serialization>
-@endcode
-
-As you can see, the function for this input is \f$f(y)=0+1x_1\f$.  We can see
-that the model we have trained catches this; in the \c \<parameters\> section of
-\c lr.xml, we can see that there are two elements, which are (approximately) 0
-and 1.  The first element corresponds to the intercept 0, and the second column
-corresponds to the coefficient 1 for the variable \f$x_1\f$.  Note that in this
-example, the regressors for the dataset are the second column.  That is, the
-dataset is one dimensional, and the last column has the \f$y\f$ values, or
-responses, for each row. You can specify these responses in a separate file if
-you want, using the \c --input_responses, or \c -r, option.
-
-@subsection cli_ex2_lrtut Compute model and predict at the same time
-
-@code
-$ mlpack_linear_regression --training_file dataset.csv --test_file predict.csv \
-> -v
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 2 x 5.
-[INFO ] Loading 'predict.csv' as raw ASCII formatted data.  Size is 1 x 3.
-[INFO ] Saving CSV data to 'predictions.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   lambda: 0
-[INFO ]   output_model_file: ""
-[INFO ]   output_predictions: predictions.csv
-[INFO ]   test_file: predict.csv
-[INFO ]   training_file: dataset.csv
-[INFO ]   training_responses: ""
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   load_regressors: 0.000371s
-[INFO ]   load_test_points: 0.000229s
-[INFO ]   loading_data: 0.000491s
-[INFO ]   prediction: 0.000075s
-[INFO ]   regression: 0.000449s
-[INFO ]   saving_data: 0.000186s
-[INFO ]   total_time: 0.002731s
-
-$ cat dataset.csv
-0,0
-1,1
-2,2
-3,3
-4,4
-
-$ cat predict.csv
-2
-3
-4
-
-$ cat predictions.csv
-2.0000000000e+00
-3.0000000000e+00
-4.0000000000e+00
-@endcode
-
-We used the same dataset, so we got the same parameters. The key thing to note
-about the \c predict.csv dataset is that it has the same dimensionality as the
-dataset used to create the model, one.  If the model generating dataset has
-\f$d\f$ dimensions, so must the dataset we want to predict for.
-
-@subsection cli_ex3_lrtut Prediction using a precomputed model
-
-@code
-$ mlpack_linear_regression --input_model_file lr.xml --test_file predict.csv -v
-[INFO ] Loading 'predict.csv' as raw ASCII formatted data.  Size is 1 x 3.
-[INFO ] Saving CSV data to 'predictions.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: lr.xml
-[INFO ]   lambda: 0
-[INFO ]   output_model_file: ""
-[INFO ]   output_predictions: predictions.csv
-[INFO ]   test_file: predict.csv
-[INFO ]   training_file: ""
-[INFO ]   training_responses: ""
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   load_model: 0.000264s
-[INFO ]   load_test_points: 0.000186s
-[INFO ]   loading_data: 0.000157s
-[INFO ]   prediction: 0.000098s
-[INFO ]   saving_data: 0.000157s
-[INFO ]   total_time: 0.001688s
-
-$ cat lr.xml
-<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
-<!DOCTYPE boost_serialization>
-<boost_serialization signature="serialization::archive" version="12">
-<linearRegressionModel class_id="0" tracking_level="0" version="0">
-  <parameters class_id="1" tracking_level="0" version="0">
-    <n_rows>2</n_rows>
-    <n_cols>1</n_cols>
-    <n_elem>2</n_elem>
-    <vec_state>1</vec_state>
-    <item>-3.97205464519563669e-16</item>
-    <item>1.00000000000000022e+00</item>
-  </parameters>
-  <lambda>0.00000000000000000e+00</lambda>
-  <intercept>1</intercept>
-</linearRegressionModel>
-</boost_serialization>
-
-$ cat predict.csv
-2
-3
-4
-
-$ cat predictions.csv
-2.0000000000e+00
-3.0000000000e+00
-4.0000000000e+00
-@endcode
-
-@subsection cli_ex4_lrtut Using ridge regression
-
-Sometimes, the input matrix of predictors has a covariance matrix that is not
-invertible, or the system is overdetermined.  In this case, ridge regression is
-useful: it adds a normalization term to the covariance matrix to make it
-invertible.  Ridge regression is a standard technique and documentation for the
-mathematics behind it can be found anywhere on the Internet.  In short, the
-covariance matrix
-
-\f[
-\mathbf{X}' \mathbf{X}
-\f]
-
-is replaced with
-
-\f[
-\mathbf{X}' \mathbf{X} + \lambda \mathbf{I}
-\f]
-
-where \f$\mathbf{I}\f$ is the identity matrix.  So, a \f$\lambda\f$ parameter
-greater than zero should be specified to perform ridge regression, using the
-\c --lambda (or \c -l) option.  An example is given below.
-
-@code
-$ mlpack_linear_regression --training_file dataset.csv -v --lambda 0.5 -M lr.xml
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 2 x 5.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   lambda: 0.5
-[INFO ]   output_model_file: lr.xml
-[INFO ]   output_predictions: predictions.csv
-[INFO ]   test_file: ""
-[INFO ]   training_file: dataset.csv
-[INFO ]   training_responses: ""
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   load_regressors: 0.000210s
-[INFO ]   loading_data: 0.000170s
-[INFO ]   regression: 0.000332s
-[INFO ]   total_time: 0.001835s
-@endcode
-
-Further documentation on options should be found by using the \c --help option.
-
-@section linreg_lrtut The 'LinearRegression' class
-
-The 'LinearRegression' class is a simple implementation of linear regression.
-
-Using the LinearRegression class is very simple. It has two available
-constructors; one for generating a model from a matrix of predictors and a
-vector of responses, and one for loading an already computed model from a given
-file.
-
-The class provides one method that performs computation:
-@code
-void Predict(const arma::mat& points, arma::vec& predictions);
-@endcode
-
-Once you have generated or loaded a model, you can call this method and pass it
-a matrix of data points to predict values for using the model. The second
-parameter, predictions, will be modified to contain the predicted values
-corresponding to each row of the points matrix.
-
-@subsection linreg_ex1_lrtut Generating a model
-
-@code
-#include <mlpack/methods/linear_regression/linear_regression.hpp>
-
-using namespace mlpack::regression;
-
-arma::mat data; // The dataset itself.
-arma::vec responses; // The responses, one row for each row in data.
-
-// Regress.
-LinearRegression lr(data, responses);
-
-// Get the parameters, or coefficients.
-arma::vec parameters = lr.Parameters();
-@endcode
-
-@subsection linreg_ex2_lrtut Setting a model
-
-Assuming you already have a model and do not need to create one, this is how
-you would set the parameters for a LinearRegression instance.
-
-@code
-arma::vec parameters; // Your model.
-
-LinearRegression lr; // Create a new LinearRegression instance or reuse one.
-lr.Parameters() = parameters; // Set the model.
-@endcode
-
-@subsection linreg_ex3_lrtut Load a model from a file
-
-If you have a generated model in a file somewhere you would like to load and
-use, you can use \c data::Load() to load it.
-
-@code
-std::string filename; // The path and name of your file.
-
-LinearRegression lr;
-data::Load(filename, "lr_model", lr);
-@endcode
-
-@subsection linreg_ex4_lrtut Prediction
-
-Once you have generated or loaded a model using one of the aforementioned
-methods, you can predict values for a dataset.
-
-@code
-LinearRegression lr();
-// Load or generate your model.
-
-// The dataset we want to predict on; each row is a data point.
-arma::mat points;
-// This will store the predictions; one row for each point.
-arma::vec predictions;
-
-lr.Predict(points, predictions); // Predict.
-
-// Now, the vector 'predictions' will contain the predicted values.
-@endcode
-
-@subsection linreg_ex5_lrtut Setting lambda for ridge regression
-
-As discussed in \ref cli_ex4_lrtut, ridge regression is useful when the
-covariance of the predictors is not invertible.  The standard constructor can be
-used to set a value of lambda:
-
-@code
-#include <mlpack/methods/linear_regression/linear_regression.hpp>
-
-using namespace mlpack::regression;
-
-arma::mat data; // The dataset itself.
-arma::vec responses; // The responses, one row for each row in data.
-
-// Regress, with a lambda of 0.5.
-LinearRegression lr(data, responses, 0.5);
-
-// Get the parameters, or coefficients.
-arma::vec parameters = lr.Parameters();
-@endcode
-
-In addition, the \c Lambda() function can be used to get or modify the lambda
-value:
-
-@code
-LinearRegression lr;
-lr.Lambda() = 0.5;
-Log::Info << "Lambda is " << lr.Lambda() << "." << std::endl;
-@endcode
-
-@section further_doc_lrtut Further documentation
-
-For further documentation on the LinearRegression class, consult the
-\ref mlpack::regression::LinearRegression "complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/linear_regression.md 4.0.1-1/doc/tutorials/linear_regression.md
--- 3.4.2-7/doc/tutorials/linear_regression.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/linear_regression.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,444 @@
+# Linear/ridge regression tutorial
+
+Linear regression and ridge regression are simple machine learning techniques
+that aim to estimate the parameters of a linear model.  Assuming we have `n`
+*predictor* points `x_i`, of dimensionality `d`, and `n` responses `y_i`, we are
+trying to estimate the best fit for `b_i` with `0 <= i <= d` in the linear model
+
+```
+y_i = b_0 + sum_j (b_j x_ij)
+```
+
+for each predictor `x_i` and response `y_i`.  If we take each predictor `x_i` as
+a row in the matrix `X` and each response `y_i` as an entry of the vector `y`,
+we can represent the model in vector form:
+
+```
+y = Xb + b_0
+```
+
+The result of this method is the vector `b`, including the offset term (or
+intercept term) `b_0`.
+
+## Command-line `mlpack_linear_regression`
+
+The simplest way to perform linear regression or ridge regression in mlpack
+is to use the `mlpack_linear_regression` program.  This program will perform
+linear regression and place the resultant coefficients into one file.  Note that
+this guide details the `mlpack_linear_regression` command-line program, but
+because mlpack also has bindings to other languages, functions like
+`linear_regression()` exist in Python and Julia, and each example below can be
+easily adapted to those languages.
+
+The output file holds a vector of coefficients in increasing order of dimension;
+that is, the offset term (`b_0`), the coefficient for dimension 1 (`b_1`, then
+dimension 2 (`b_2`) and so forth, as well as the intercept.  This executable can
+also predict the `y` values of a second dataset based on the computed
+coefficients.
+
+Below are several examples of simple usage (and the resultant output).  The `-v`
+option is used so that verbose output is given.  Further documentation on each
+individual option can be found by typing
+
+```sh
+$ mlpack_linear_regression --help
+```
+
+### One file, generating the function coefficients
+
+```sh
+$ mlpack_linear_regression --training_file dataset.csv -v -M lr.xml
+[INFO ] Loading 'dataset.csv' as CSV data.  Size is 2 x 5.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   lambda: 0
+[INFO ]   output_model_file: lr.xml
+[INFO ]   output_predictions: predictions.csv
+[INFO ]   test_file: ""
+[INFO ]   training_file: dataset.csv
+[INFO ]   training_responses: ""
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   load_regressors: 0.000263s
+[INFO ]   loading_data: 0.000220s
+[INFO ]   regression: 0.000392s
+[INFO ]   total_time: 0.001920s
+```
+
+Convenient program timers are given for different parts of the calculation at
+the bottom of the output, as well as the parameters the simulation was run with.
+Now, if we look at the output model file, which is `lr.xml`,
+
+```sh
+$ cat dataset.csv
+0,0
+1,1
+2,2
+3,3
+4,4
+
+$ cat lr.xml
+<?xml version="1.0" encoding="utf-8"?>
+<cereal>
+	<model>
+		<cereal_class_version>0</cereal_class_version>
+		<parameters>
+			<n_rows>2</n_rows>
+			<n_cols>1</n_cols>
+			<vec_state>1</vec_state>
+			<elem>0</elem>
+			<elem>1</elem>
+		</parameters>
+		<lambda>0</lambda>
+		<intercept>true</intercept>
+	</model>
+</cereal>
+```
+
+As you can see, the function for this input is `f(y) = 0 + 1 x_1`.  We can see
+that the model we have trained catches this; in the `<parameters>` section of
+`lr.xml`, we can see that there are two elements, which are (approximately) 0
+and 1.  The first element corresponds to the intercept 0, and the second column
+corresponds to the coefficient 1 for the variable `x_1`.  Note that in this
+example, the regressors for the dataset are the second column.  That is, the
+dataset is one dimensional, and the last column has the `y` values, or
+responses, for each row. You can specify these responses in a separate file if
+you want, using the `--input_responses`, or `-r`, option.
+
+### Train a multivariate linear regression model
+
+Multivariate linear regression means that the response variable is predicted by
+more than just one input variable. In this example we will try to fit a
+multivariate linear regression model to data that contains four variables,
+stored in `dataset_2.csv`.
+
+```sh
+$ cat dataset_2.csv
+0,0,0,0,14
+1,1,1,1,24
+2,1,0,2,27
+1,2,2,2,32
+-1,-3,0,2,17
+```
+
+Now let's run `mlpack_linear_regression` as usual:
+
+```sh
+$ mlpack_linear_regression --training_file dataset_2.csv -v -M lr.xml
+[INFO ] Loading 'dataset_2.csv' as CSV data.  Size is 5 x 5.
+[INFO ] 
+[INFO ] Execution parameters:
+[INFO ]   help: 0
+[INFO ]   info: 
+[INFO ]   input_model_file: 
+[INFO ]   lambda: 0
+[INFO ]   output_model_file: lr.xml
+[INFO ]   output_predictions_file: 
+[INFO ]   test_file: 
+[INFO ]   training_file: dataset_2.csv
+[INFO ]   training_responses_file: 
+[INFO ]   verbose: 1
+[INFO ]   version: 0
+[INFO ] Program timers:
+[INFO ]   load_regressors: 0.000060s
+[INFO ]   loading_data: 0.000050s
+[INFO ]   regression: 0.000049s
+[INFO ]   total_time: 0.000118s
+
+$ cat lr.xml
+<?xml version="1.0" encoding="utf-8"?>
+<cereal>
+	<model>
+		<cereal_class_version>0</cereal_class_version>
+		<parameters>
+			<n_rows>5</n_rows>
+			<n_cols>1</n_cols>
+			<vec_state>1</vec_state>
+			<elem>14.00000000000002</elem>
+			<elem>1.9999999999999447</elem>
+			<elem>1.0000000000000431</elem>
+			<elem>2.9999999999999516</elem>
+			<elem>4.0000000000000249</elem>
+		</parameters>
+		<lambda>0</lambda>
+		<intercept>true</intercept>
+	</model>
+</cereal>
+```
+
+If we take a look at the `lr.xml` output we can see the `<parameters>` part has
+five elements which the first corresponds to `b_0`, the second corresponds to
+`b_1` , and so on. This is equivalent to `f(y) = b_0 + b_1 x_1 + b_2 x_2 + b_3
+x_3 + b_4 x_4`, or `f(y) = 14 + 2 x_1 + 1 x_2 + 3 x_3 + 4 x_4`.
+
+### Compute model and predict at the same time
+
+```sh
+$ mlpack_linear_regression --training_file dataset.csv --test_file predict.csv --output_predictions_file predictions.csv \
+> -v
+[WARN ] '--output_predictions_file (-o)' ignored because '--test_file (-T)' is specified!
+[INFO ] Loading 'dataset.csv' as CSV data.  Size is 2 x 5.
+[INFO ] Loading 'predict.csv' as raw ASCII formatted data.  Size is 1 x 3.
+[INFO ] Saving CSV data to 'predictions.csv'.
+[INFO ] 
+[INFO ] Execution parameters:
+[INFO ]   help: 0
+[INFO ]   info: 
+[INFO ]   input_model_file: 
+[INFO ]   lambda: 0
+[INFO ]   output_model_file: 
+[INFO ]   output_predictions_file: 'predictions.csv' (1x3 matrix)
+[INFO ]   test_file: 'predict.csv' (0x0 matrix)
+[INFO ]   training_file: 'dataset.csv' (0x0 matrix)
+[INFO ]   training_responses_file: ''
+[INFO ]   verbose: 1
+[INFO ]   version: 0
+[INFO ] Program timers:
+[INFO ]   load_regressors: 0.000069s
+[INFO ]   load_test_points: 0.000031s
+[INFO ]   loading_data: 0.000079s
+[INFO ]   prediction: 0.000001s
+[INFO ]   regression: 0.000054s
+[INFO ]   saving_data: 0.000055s
+[INFO ]   total_time: 0.000203s
+
+
+$ cat dataset.csv
+0,0
+1,1
+2,2
+3,3
+4,4
+
+$ cat predict.csv
+2
+3
+4
+
+$ cat predictions.csv
+2.0000000000e+00
+3.0000000000e+00
+4.0000000000e+00
+```
+
+We used the same dataset, so we got the same parameters. The key thing to note
+about the `predict.csv` dataset is that it has the same dimensionality as the
+dataset used to create the model, one.  If the model generating dataset has `d`
+dimensions, so must the dataset we want to predict for.
+
+### Prediction using a precomputed model
+
+```sh
+$ mlpack_linear_regression --input_model_file lr.xml --test_file predict.csv --output_predictions_file predictions.csv -v
+[WARN ] '--output_predictions_file (-o)' ignored because '--test_file (-T)' is specified!
+[INFO ] Loading 'predict.csv' as raw ASCII formatted data.  Size is 1 x 3.
+[INFO ] Saving CSV data to 'predictions.csv'.
+[INFO ] 
+[INFO ] Execution parameters:
+[INFO ]   help: 0
+[INFO ]   info: 
+[INFO ]   input_model_file: lr.xml
+[INFO ]   lambda: 0
+[INFO ]   output_model_file: 
+[INFO ]   output_predictions_file: 'predictions.csv' (1x3 matrix)
+[INFO ]   test_file: 'predict.csv' (0x0 matrix)
+[INFO ]   training_file: ''
+[INFO ]   training_responses_file: ''
+[INFO ]   verbose: 1
+[INFO ]   version: 0
+[INFO ] Program timers:
+[INFO ]   load_model: 0.000051s
+[INFO ]   load_test_points: 0.000052s
+[INFO ]   loading_data: 0.000044s
+[INFO ]   prediction: 0.000010s
+[INFO ]   saving_data: 0.000079s
+[INFO ]   total_time: 0.000160s
+
+
+$ cat lr.xml
+<?xml version="1.0" encoding="utf-8"?>
+<cereal>
+	<model>
+		<cereal_class_version>0</cereal_class_version>
+		<parameters>
+			<n_rows>2</n_rows>
+			<n_cols>1</n_cols>
+			<vec_state>1</vec_state>
+			<elem>0</elem>
+			<elem>1</elem>
+		</parameters>
+		<lambda>0</lambda>
+		<intercept>true</intercept>
+	</model>
+</cereal>
+
+
+$ cat predict.csv
+2
+3
+4
+
+$ cat predictions.csv
+2.0000000000e+00
+3.0000000000e+00
+4.0000000000e+00
+```
+
+### Using ridge regression
+
+Sometimes, the input matrix of predictors has a covariance matrix that is not
+invertible, or the system is overdetermined.  In this case, ridge regression is
+useful: it adds a normalization term to the covariance matrix to make it
+invertible.  Ridge regression is a standard technique and documentation for the
+mathematics behind it can be found anywhere on the Internet.  In short, the
+covariance matrix `X' X` is replaced with `X' X + l I` where `I` is the identity
+matrix.  So, an `l` parameter greater than zero should be specified to perform
+ridge regression, using the `--lambda` (or `-l`) option.  An example is given
+below.
+
+```sh
+$ mlpack_linear_regression --training_file dataset.csv -v --lambda 0.5 -M lr.xml
+[INFO ] Loading 'dataset.csv' as CSV data.  Size is 2 x 5.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   lambda: 0.5
+[INFO ]   output_model_file: lr.xml
+[INFO ]   output_predictions: predictions.csv
+[INFO ]   test_file: ""
+[INFO ]   training_file: dataset.csv
+[INFO ]   training_responses: ""
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   load_regressors: 0.000210s
+[INFO ]   loading_data: 0.000170s
+[INFO ]   regression: 0.000332s
+[INFO ]   total_time: 0.001835s
+```
+
+Further documentation on options should be found by using the `--help` option.
+
+## The `LinearRegression` class
+
+The `LinearRegression` class is a simple implementation of linear regression.
+
+Using the `LinearRegression` class is very simple. It has two available
+constructors; one for generating a model from a matrix of predictors and a
+vector of responses, and one for loading an already computed model from a given
+file.
+
+The class provides one method that performs computation:
+
+```c++
+void Predict(const arma::mat& points, arma::vec& predictions);
+```
+
+Once you have generated or loaded a model, you can call this method and pass it
+a matrix of data points to predict values for using the model. The second
+parameter, predictions, will be modified to contain the predicted values
+corresponding to each row of the points matrix.
+
+### Generating a model
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+arma::mat data; // The dataset itself.
+arma::vec responses; // The responses, one row for each row in data.
+
+// Regress.
+LinearRegression lr(data, responses);
+
+// Get the parameters, or coefficients.
+arma::vec parameters = lr.Parameters();
+```
+
+### Setting a model
+
+Assuming you already have a model and do not need to create one, this is how
+you would set the parameters for a `LinearRegression` instance.
+
+```c++
+arma::vec parameters; // Your model.
+
+LinearRegression lr; // Create a new LinearRegression instance or reuse one.
+lr.Parameters() = parameters; // Set the model.
+```
+
+### Load a model from file
+
+If you have a generated model in a file somewhere you would like to load and
+use, you can use `data::Load()` to load it.
+
+```c++
+std::string filename; // The path and name of your file.
+
+LinearRegression lr;
+data::Load(filename, "lr_model", lr);
+```
+
+### Prediction
+
+Once you have generated or loaded a model using one of the aforementioned
+methods, you can predict values for a dataset.
+
+```c++
+LinearRegression lr();
+// Load or generate your model.
+
+// The dataset we want to predict on; each row is a data point.
+arma::mat points;
+// This will store the predictions; one row for each point.
+arma::vec predictions;
+
+lr.Predict(points, predictions); // Predict.
+
+// Now, the vector 'predictions' will contain the predicted values.
+```
+
+### Setting lambda for ridge regression
+
+As discussed in an earlier example, ridge regression is useful when the
+covariance of the predictors is not invertible.  The standard constructor can be
+used to set a value of lambda:
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+arma::mat data; // The dataset itself.
+arma::vec responses; // The responses, one row for each row in data.
+
+// Regress, with a lambda of 0.5.
+LinearRegression lr(data, responses, 0.5);
+
+// Get the parameters, or coefficients.
+arma::vec parameters = lr.Parameters();
+```
+
+In addition, the `Lambda()` function can be used to get or modify the lambda
+value:
+
+```c++
+LinearRegression lr;
+lr.Lambda() = 0.5;
+Log::Info << "Lambda is " << lr.Lambda() << "." << std::endl;
+```
+
+## Further documentation
+
+For further documentation on the LinearRegression class, consult the comments in
+the source code of the `LinearRegression` class, found in
+`mlpack/methods/linear_regression/`.
diff -pruN 3.4.2-7/doc/tutorials/neighbor_search/neighbor_search.txt 4.0.1-1/doc/tutorials/neighbor_search/neighbor_search.txt
--- 3.4.2-7/doc/tutorials/neighbor_search/neighbor_search.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/neighbor_search/neighbor_search.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,460 +0,0 @@
-/*!
-
-@file neighbor_search.txt
-@author Ryan Curtin
-@brief Tutorial for how to use the NeighborSearch class.
-
-@page nstutorial NeighborSearch tutorial (k-nearest-neighbors)
-
-@section intro_nstut Introduction
-
-Nearest-neighbors search is a common machine learning task.  In this setting, we
-have a \b query and a \b reference dataset.  For each point in the \b query
-dataset, we wish to know the \f$k\f$ points in the \b reference dataset which
-are closest to the given query point.
-
-Alternately, if the query and reference datasets are the same, the problem can
-be stated more simply: for each point in the dataset, we wish to know the
-\f$k\f$ nearest points to that point.
-
-\b mlpack provides:
-
- - a \ref cli_nstut "simple command-line executable" to run nearest-neighbors search
-   (and furthest-neighbors search)
- - a \ref knn_nstut "simple C++ interface" to perform nearest-neighbors search (and
-   furthest-neighbors search)
- - a \ref neighborsearch_nstut "generic, extensible, and powerful C++ class (NeighborSearch)" for complex usage
-
-@section toc_nstut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_nstut
- - \ref toc_nstut
- - \ref cli_nstut
-   - \ref cli_ex1_nstut
-   - \ref cli_ex2_nstut
-   - \ref cli_ex3_nstut
- - \ref knn_nstut
-   - \ref knn_ex1_nstut
-   - \ref knn_ex2_nstut
-   - \ref knn_ex3_nstut
- - \ref neighborsearch_nstut
-   - \ref sort_policy_doc_nstut
-   - \ref metric_type_doc_nstut
-   - \ref mat_type_doc_nstut
-   - \ref tree_type_doc_nstut
-   - \ref traverser_type_doc_nstut
- - \ref further_doc_nstut
-
-@section cli_nstut Command-Line 'mlpack_knn'
-
-The simplest way to perform nearest-neighbors search in \b mlpack is to use the
-\c mlpack_knn executable.  This program will perform nearest-neighbors search
-and place the resultant neighbors into one file and the resultant distances into
-another.  The output files are organized such that the first row corresponds to
-the nearest neighbors of the first query point, with the first column
-corresponding to the nearest neighbor, and so forth.
-
-Below are several examples of simple usage (and the resultant output).  The
-\c -v option is used so that output is given.  Further documentation on each
-individual option can be found by typing
-
-@code
-$ mlpack_knn --help
-@endcode
-
-@subsection cli_ex1_nstut One dataset, 5 nearest neighbors
-
-@code
-$ mlpack_knn -r dataset.csv -n neighbors_out.csv -d distances_out.csv -k 5 -v
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded reference data from 'dataset.csv' (3 x 1000).
-[INFO ] Building reference tree...
-[INFO ] Tree built.
-[INFO ] Searching for 5 nearest neighbors with dual-tree kd-tree search...
-[INFO ] 18412 node combinations were scored.
-[INFO ] 54543 base cases were calculated.
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'neighbors_out.csv'.
-[INFO ] Saving CSV data to 'distances_out.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   distances_file: distances_out.csv
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 5
-[INFO ]   leaf_size: 20
-[INFO ]   naive: false
-[INFO ]   neighbors_file: neighbors_out.csv
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: ""
-[INFO ]   random_basis: false
-[INFO ]   reference_file: dataset.csv
-[INFO ]   seed: 0
-[INFO ]   single_mode: false
-[INFO ]   tree_type: kd
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   computing_neighbors: 0.108968s
-[INFO ]   loading_data: 0.006495s
-[INFO ]   saving_data: 0.003843s
-[INFO ]   total_time: 0.126036s
-[INFO ]   tree_building: 0.003442s
-@endcode
-
-Convenient program timers are given for different parts of the calculation at
-the bottom of the output, as well as the parameters the simulation was run with.
-Now, if we look at the output files:
-
-@code
-$ head neighbors_out.csv
-862,344,224,43,885
-703,499,805,639,450
-867,472,972,380,601
-397,319,277,443,323
-840,827,865,38,438
-732,876,751,492,616
-563,222,569,985,940
-361,97,928,437,79
-547,695,419,961,716
-982,113,689,843,634
-
-$ head distances_out.csv
-5.986076164057e-02,7.664920518084e-02,1.116050961847e-01,1.155595474371e-01,1.169810085522e-01
-7.532635022982e-02,1.012564715841e-01,1.127846944644e-01,1.209584396720e-01,1.216543647014e-01
-7.659571546879e-02,1.014588981948e-01,1.025114621511e-01,1.128082429187e-01,1.131659758673e-01
-2.079405647909e-02,4.710724516732e-02,7.597622408419e-02,9.171977778898e-02,1.037033340864e-01
-7.082206779700e-02,9.002355499742e-02,1.044181406406e-01,1.093149568834e-01,1.139700558608e-01
-5.688056488896e-02,9.478072514474e-02,1.085637706630e-01,1.114177921451e-01,1.139370265105e-01
-7.882260880455e-02,9.454474078041e-02,9.724494179950e-02,1.023829575445e-01,1.066927013814e-01
-7.005321598247e-02,9.131417221561e-02,9.498248889074e-02,9.897964162308e-02,1.121202216165e-01
-5.295654132754e-02,5.509877761894e-02,8.108227366619e-02,9.785461174861e-02,1.043968140367e-01
-3.992859920333e-02,4.471418646159e-02,7.346053904990e-02,9.181982339584e-02,9.843075910782e-02
-@endcode
-
-So, the nearest neighbor to point 0 is point 862, with a distance of
-5.986076164057e-02.  The second nearest neighbor to point 0 is point 344, with a
-distance of 7.664920518084e-02.  The third nearest neighbor to point 5 is point
-751, with a distance of 1.085637706630e-01.
-
-@subsection cli_ex2_nstut Query and reference dataset, 10 nearest neighbors
-
-@code
-$ mlpack_knn -q query_dataset.csv -r reference_dataset.csv \
-> -n neighbors_out.csv -d distances_out.csv -k 10 -v
-[INFO ] Loading 'reference_dataset.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded reference data from 'reference_dataset.csv' (3 x 1000).
-[INFO ] Building reference tree...
-[INFO ] Tree built.
-[INFO ] Loading 'query_dataset.csv' as CSV data.  Size is 3 x 50.
-[INFO ] Loaded query data from 'query_dataset.csv' (3x50).
-[INFO ] Searching for 10 nearest neighbors with dual-tree kd-tree search...
-[INFO ] Building query tree...
-[INFO ] Tree built.
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'neighbors_out.csv'.
-[INFO ] Saving CSV data to 'distances_out.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   distances_file: distances_out.csv
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 10
-[INFO ]   leaf_size: 20
-[INFO ]   naive: false
-[INFO ]   neighbors_file: neighbors_out.csv
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: query_dataset.csv
-[INFO ]   random_basis: false
-[INFO ]   reference_file: reference_dataset.csv
-[INFO ]   seed: 0
-[INFO ]   single_mode: false
-[INFO ]   tree_type: kd
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   computing_neighbors: 0.022589s
-[INFO ]   loading_data: 0.003572s
-[INFO ]   saving_data: 0.000755s
-[INFO ]   total_time: 0.032197s
-[INFO ]   tree_building: 0.002590s
-@endcode
-
-@subsection cli_ex3_nstut One dataset, 3 nearest neighbors, leaf size of 15 points
-
-@code
-$ mlpack_knn -r dataset.csv -n neighbors_out.csv -d distances_out.csv -k 3 -l 15 -v
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded reference data from 'dataset.csv' (3 x 1000).
-[INFO ] Building reference tree...
-[INFO ] Tree built.
-[INFO ] Searching for 3 nearest neighbors with dual-tree kd-tree search...
-[INFO ] 19692 node combinations were scored.
-[INFO ] 36263 base cases were calculated.
-[INFO ] Search complete.
-[INFO ] Saving CSV data to 'neighbors_out.csv'.
-[INFO ] Saving CSV data to 'distances_out.csv'.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   distances_file: distances_out.csv
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   k: 3
-[INFO ]   leaf_size: 15
-[INFO ]   naive: false
-[INFO ]   neighbors_file: neighbors_out.csv
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: ""
-[INFO ]   random_basis: false
-[INFO ]   reference_file: dataset.csv
-[INFO ]   seed: 0
-[INFO ]   single_mode: false
-[INFO ]   tree_type: kd
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   computing_neighbors: 0.059020s
-[INFO ]   loading_data: 0.002791s
-[INFO ]   saving_data: 0.002369s
-[INFO ]   total_time: 0.069277s
-[INFO ]   tree_building: 0.002713s
-@endcode
-
-Further documentation on options should be found by using the --help option.
-
-@section knn_nstut The 'KNN' class
-
-The 'KNN' class is, specifically, a typedef of the more extensible
-NeighborSearch class, querying for nearest neighbors using the Euclidean
-distance.
-
-@code
-typedef NeighborSearch<NearestNeighborSort, metric::EuclideanDistance>
-    KNN;
-@endcode
-
-Using the KNN class is particularly simple; first, the object must be
-constructed and given a dataset.  Then, the method is run, and two matrices are
-returned: one which holds the indices of the nearest neighbors, and one which
-holds the distances of the nearest neighbors.  These are of the same structure
-as the output --neighbors_file and --distances_file for the CLI interface (see
-above).  A handful of examples of simple usage of the KNN class are given
-below.
-
-@subsection knn_ex1_nstut 5 nearest neighbors on a single dataset
-
-@code
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack::neighbor;
-
-// Our dataset matrix, which is column-major.
-extern arma::mat data;
-
-KNN a(data);
-
-// The matrices we will store output in.
-arma::Mat<size_t> resultingNeighbors;
-arma::mat resultingDistances;
-
-a.Search(5, resultingNeighbors, resultingDistances);
-@endcode
-
-The output of the search is stored in resultingNeighbors and resultingDistances.
-
-@subsection knn_ex2_nstut 10 nearest neighbors on a query and reference dataset
-
-@code
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack::neighbor;
-
-// Our dataset matrices, which are column-major.
-extern arma::mat queryData, referenceData;
-
-KNN a(referenceData);
-
-// The matrices we will store output in.
-arma::Mat<size_t> resultingNeighbors;
-arma::mat resultingDistances;
-
-a.Search(queryData, 10, resultingNeighbors, resultingDistances);
-@endcode
-
-@subsection knn_ex3_nstut Naive (exhaustive) search for 6 nearest neighbors on one dataset
-
-This example uses the O(n^2) naive search (not the tree-based search).
-
-@code
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack::neighbor;
-
-// Our dataset matrix, which is column-major.
-extern arma::mat dataset;
-
-KNN a(dataset, true);
-
-// The matrices we will store output in.
-arma::Mat<size_t> resultingNeighbors;
-arma::mat resultingDistances;
-
-a.Search(6, resultingNeighbors, resultingDistances);
-@endcode
-
-Needless to say, naive search can be very slow...
-
-@section neighborsearch_nstut The extensible 'NeighborSearch' class
-
-The NeighborSearch class is very extensible, having the following template
-arguments:
-
-@code
-template<
-  typename SortPolicy = NearestNeighborSort,
-  typename MetricType = mlpack::metric::EuclideanDistance,
-  typename MatType = arma::mat,
-  template<typename TreeMetricType,
-           typename TreeStatType,
-           typename TreeMatType> class TreeType = tree::KDTree,
-  template<typename RuleType> class TraversalType =
-      TreeType<MetricType, NeighborSearchStat<SortPolicy>,
-               MatType>::template DualTreeTraverser>
->
-class NeighborSearch;
-@endcode
-
-By choosing different components for each of these template classes, a very
-arbitrary neighbor searching object can be constructed.  Note that each of these
-template parameters have defaults, so it is not necessary to specify each one.
-
-@subsection sort_policy_doc_nstut SortPolicy policy class
-
-The SortPolicy template parameter allows specification of how the NeighborSearch
-object will decide which points are to be searched for.  The
-mlpack::neighbor::NearestNeighborSort class is a well-documented example.  A
-custom SortPolicy class must implement the same methods which
-NearestNeighborSort does:
-
-@code
-static size_t SortDistance(const arma::vec& list, double newDistance);
-
-static bool IsBetter(const double value, const double ref);
-
-template<typename TreeType>
-static double BestNodeToNodeDistance(const TreeType* queryNode,
-                                     const TreeType* referenceNode);
-
-template<typename TreeType>
-static double BestPointToNodeDistance(const arma::vec& queryPoint,
-                                      const TreeType* referenceNode);
-
-static const double WorstDistance();
-
-static const double BestDistance();
-@endcode
-
-The mlpack::neighbor::FurthestNeighborSort class is another implementation,
-which is used to create the 'KFN' typedef class, which finds the furthest
-neighbors, as opposed to the nearest neighbors.
-
-@subsection metric_type_doc_nstut MetricType policy class
-
-The MetricType policy class allows the neighbor search to take place in any
-arbitrary metric space.  The mlpack::metric::LMetric class is a good example
-implementation.  A MetricType class must provide the following functions:
-
-@code
-// Empty constructor is required.
-MetricType();
-
-// Compute the distance between two points.
-template<typename VecType>
-double Evaluate(const VecType& a, const VecType& b);
-@endcode
-
-Internally, the NeighborSearch class keeps an instantiated MetricType class
-(which can be given in the constructor).   This is useful for a metric like the
-Mahalanobis distance (mlpack::metric::MahalanobisDistance), which must store
-state (the covariance matrix).  Therefore, you can write a non-static MetricType
-class and use it seamlessly with NeighborSearch.
-
-For more information on the MetricType policy, see the documentation
-\ref metrics "here".
-
-@subsection mat_type_doc_nstut MatType policy class
-
-The MatType template parameter specifies the type of data matrix used.  This
-type must implement the same operations as an Armadillo matrix, and so standard
-choices are @c arma::mat and @c arma::sp_mat.
-
-@subsection tree_type_doc_nstut TreeType policy class
-
-The NeighborSearch class allows great extensibility in the selection of the type
-of tree used for search.  This type must follow the typical mlpack TreeType
-policy, documented \ref trees "here".
-
-Typical choices might include mlpack::tree::KDTree, mlpack::tree::BallTree,
-mlpack::tree::StandardCoverTree, mlpack::tree::RTree, or
-mlpack::tree::RStarTree.  It is easily possible to make your own tree type for
-use with NeighborSearch; consult the \ref trees "TreeType documentation" for
-more details.
-
-An example of using the NeighborSearch class with a ball tree is given below.
-
-@code
-// Construct a NeighborSearch object with ball bounds.
-NeighborSearch<
-    NearestNeighborSort,
-    metric::EuclideanDistance,
-    arma::mat,
-    tree::BallTree
-> neighborSearch(dataset);
-@endcode
-
-@subsection traverser_type_doc_nstut TraverserType policy class
-
-The last template parameter the NeighborSearch class offers is the TraverserType
-class.  The TraverserType class holds the strategy used to traverse the trees in
-either single-tree or dual-tree search mode.  By default, it is set to use the
-default traverser of the given @c TreeType (which is the member @c
-TreeType::DualTreeTraverser).
-
-This class must implement the following two methods:
-
-@code
-// Instantiate with a given RuleType.
-TraverserType(RuleType& rule);
-
-// Traverse with two trees.
-void Traverse(TreeType& queryNode, TreeType& referenceNode);
-@endcode
-
-The RuleType class provides the following functions for use in the traverser:
-
-@code
-// Evaluate the base case between two points.
-double BaseCase(const size_t queryIndex, const size_t referenceIndex);
-
-// Score the two nodes to see if they can be pruned, returning DBL_MAX if they
-// can be pruned.
-double Score(TreeType& queryNode, TreeType& referenceNode);
-@endcode
-
-Note also that any traverser given must satisfy the definition of a pruning
-dual-tree traversal given in the paper "Tree-independent dual-tree algorithms".
-
-@section further_doc_nstut Further documentation
-
-For further documentation on the NeighborSearch class, consult the
-\ref mlpack::neighbor::NeighborSearch "complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/neighbor_search.md 4.0.1-1/doc/tutorials/neighbor_search.md
--- 3.4.2-7/doc/tutorials/neighbor_search.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/neighbor_search.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,432 @@
+# NeighborSearch tutorial (k-nearest-neighbors)
+
+Nearest-neighbors search is a common machine learning task.  In this setting, we
+have a *query* and a *reference* dataset.  For each point in the *query*
+dataset, we wish to know the `k` points in the *reference* dataset which are
+closest to the given query point.
+
+Alternately, if the query and reference datasets are the same, the problem can
+be stated more simply: for each point in the dataset, we wish to know the `k`
+nearest points to that point.
+
+mlpack provides:
+
+ - a simple command-line executable to run nearest-neighbors search (and
+   furthest-neighbors search)
+ - a simple C++ interface to perform nearest-neighbors search (and
+   furthest-neighbors search)
+ - a generic, extensible, and powerful C++ class (`NeighborSearch`) for complex
+   usage
+
+## Command-line `mlpack_knn`
+
+The simplest way to perform nearest-neighbors search in mlpack is to use the
+`mlpack_knn` executable.  *(Note that mlpack also provides bindings to other
+languages, so, e.g., the `knn()` function is available in Python and Julia and
+has the same options.  So, any example here can be readily adapted to another
+language that mlpack provides bindings for.)*
+
+The `mlpack_knn` program will perform nearest-neighbors search and place the
+resultant neighbors into one file and the resultant distances into another.  The
+output files are organized such that the first row corresponds to the nearest
+neighbors of the first query point, with the first column corresponding to the
+nearest neighbor, and so forth.
+
+Below are several examples of simple usage (and the resultant output).  The `-v`
+option is used so that output is given.  Further documentation on each
+individual option can be found by typing
+
+```sh
+$ mlpack_knn --help
+```
+
+### One dataset, 5 nearest neighbors
+
+```sh
+$ mlpack_knn -r dataset.csv -n neighbors_out.csv -d distances_out.csv -k 5 -v
+[INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded reference data from 'dataset.csv' (3 x 1000).
+[INFO ] Building reference tree...
+[INFO ] Tree built.
+[INFO ] Searching for 5 nearest neighbors with dual-tree kd-tree search...
+[INFO ] 18412 node combinations were scored.
+[INFO ] 54543 base cases were calculated.
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'neighbors_out.csv'.
+[INFO ] Saving CSV data to 'distances_out.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   distances_file: distances_out.csv
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 5
+[INFO ]   leaf_size: 20
+[INFO ]   naive: false
+[INFO ]   neighbors_file: neighbors_out.csv
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: ""
+[INFO ]   random_basis: false
+[INFO ]   reference_file: dataset.csv
+[INFO ]   seed: 0
+[INFO ]   single_mode: false
+[INFO ]   tree_type: kd
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   computing_neighbors: 0.108968s
+[INFO ]   loading_data: 0.006495s
+[INFO ]   saving_data: 0.003843s
+[INFO ]   total_time: 0.126036s
+[INFO ]   tree_building: 0.003442s
+```
+
+Convenient program timers are given for different parts of the calculation at
+the bottom of the output, as well as the parameters the simulation was run with.
+Now, if we look at the output files:
+
+```sh
+$ head neighbors_out.csv
+862,344,224,43,885
+703,499,805,639,450
+867,472,972,380,601
+397,319,277,443,323
+840,827,865,38,438
+732,876,751,492,616
+563,222,569,985,940
+361,97,928,437,79
+547,695,419,961,716
+982,113,689,843,634
+
+$ head distances_out.csv
+5.986076164057e-02,7.664920518084e-02,1.116050961847e-01,1.155595474371e-01,1.169810085522e-01
+7.532635022982e-02,1.012564715841e-01,1.127846944644e-01,1.209584396720e-01,1.216543647014e-01
+7.659571546879e-02,1.014588981948e-01,1.025114621511e-01,1.128082429187e-01,1.131659758673e-01
+2.079405647909e-02,4.710724516732e-02,7.597622408419e-02,9.171977778898e-02,1.037033340864e-01
+7.082206779700e-02,9.002355499742e-02,1.044181406406e-01,1.093149568834e-01,1.139700558608e-01
+5.688056488896e-02,9.478072514474e-02,1.085637706630e-01,1.114177921451e-01,1.139370265105e-01
+7.882260880455e-02,9.454474078041e-02,9.724494179950e-02,1.023829575445e-01,1.066927013814e-01
+7.005321598247e-02,9.131417221561e-02,9.498248889074e-02,9.897964162308e-02,1.121202216165e-01
+5.295654132754e-02,5.509877761894e-02,8.108227366619e-02,9.785461174861e-02,1.043968140367e-01
+3.992859920333e-02,4.471418646159e-02,7.346053904990e-02,9.181982339584e-02,9.843075910782e-02
+```
+
+So, the nearest neighbor to point 0 is point 862, with a distance of
+`5.986076164057e-02`.  The second nearest neighbor to point 0 is point 344, with
+a distance of `7.664920518084e-02`.  The third nearest neighbor to point 5 is
+point 751, with a distance of `1.085637706630e-01`.
+
+### Query and reference dataset, 10 nearest neighbors
+
+```sh
+$ mlpack_knn -q query_dataset.csv -r reference_dataset.csv \
+> -n neighbors_out.csv -d distances_out.csv -k 10 -v
+[INFO ] Loading 'reference_dataset.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded reference data from 'reference_dataset.csv' (3 x 1000).
+[INFO ] Building reference tree...
+[INFO ] Tree built.
+[INFO ] Loading 'query_dataset.csv' as CSV data.  Size is 3 x 50.
+[INFO ] Loaded query data from 'query_dataset.csv' (3x50).
+[INFO ] Searching for 10 nearest neighbors with dual-tree kd-tree search...
+[INFO ] Building query tree...
+[INFO ] Tree built.
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'neighbors_out.csv'.
+[INFO ] Saving CSV data to 'distances_out.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   distances_file: distances_out.csv
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 10
+[INFO ]   leaf_size: 20
+[INFO ]   naive: false
+[INFO ]   neighbors_file: neighbors_out.csv
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: query_dataset.csv
+[INFO ]   random_basis: false
+[INFO ]   reference_file: reference_dataset.csv
+[INFO ]   seed: 0
+[INFO ]   single_mode: false
+[INFO ]   tree_type: kd
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   computing_neighbors: 0.022589s
+[INFO ]   loading_data: 0.003572s
+[INFO ]   saving_data: 0.000755s
+[INFO ]   total_time: 0.032197s
+[INFO ]   tree_building: 0.002590s
+```
+
+### One dataset, 3 nearest neighbors, leaf size of 15 points
+
+```sh
+$ mlpack_knn -r dataset.csv -n neighbors_out.csv -d distances_out.csv -k 3 -l 15 -v
+[INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded reference data from 'dataset.csv' (3 x 1000).
+[INFO ] Building reference tree...
+[INFO ] Tree built.
+[INFO ] Searching for 3 nearest neighbors with dual-tree kd-tree search...
+[INFO ] 19692 node combinations were scored.
+[INFO ] 36263 base cases were calculated.
+[INFO ] Search complete.
+[INFO ] Saving CSV data to 'neighbors_out.csv'.
+[INFO ] Saving CSV data to 'distances_out.csv'.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   distances_file: distances_out.csv
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   k: 3
+[INFO ]   leaf_size: 15
+[INFO ]   naive: false
+[INFO ]   neighbors_file: neighbors_out.csv
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: ""
+[INFO ]   random_basis: false
+[INFO ]   reference_file: dataset.csv
+[INFO ]   seed: 0
+[INFO ]   single_mode: false
+[INFO ]   tree_type: kd
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   computing_neighbors: 0.059020s
+[INFO ]   loading_data: 0.002791s
+[INFO ]   saving_data: 0.002369s
+[INFO ]   total_time: 0.069277s
+[INFO ]   tree_building: 0.002713s
+```
+
+Further documentation on options should be found by using the `--help` option.
+
+## The `KNN` class
+
+The `KNN` class is, specifically, a typedef of the more extensible
+`NeighborSearch` class, querying for nearest neighbors using the Euclidean
+distance.
+
+```c++
+typedef NeighborSearch<NearestNeighborSort, EuclideanDistance> KNN;
+```
+
+Using the `KNN` class is particularly simple; first, the object must be
+constructed and given a dataset.  Then, the method is run, and two matrices are
+returned: one which holds the indices of the nearest neighbors, and one which
+holds the distances of the nearest neighbors.  These are of the same structure
+as the output `--neighbors_file` and `--distances_file` for the command-line
+program (see above).  A handful of examples of simple usage of the KNN class are
+given below.
+
+### 5 nearest neighbors on a single dataset
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// Our dataset matrix, which is column-major.
+extern arma::mat data;
+
+KNN a(data);
+
+// The matrices we will store output in.
+arma::Mat<size_t> resultingNeighbors;
+arma::mat resultingDistances;
+
+a.Search(5, resultingNeighbors, resultingDistances);
+```
+
+The output of the search is stored in `resultingNeighbors` and
+`resultingDistances`.
+
+### 10 nearest neighbors on a query and reference dataset
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// Our dataset matrices, which are column-major.
+extern arma::mat queryData, referenceData;
+
+KNN a(referenceData);
+
+// The matrices we will store output in.
+arma::Mat<size_t> resultingNeighbors;
+arma::mat resultingDistances;
+
+a.Search(queryData, 10, resultingNeighbors, resultingDistances);
+```
+
+### Naive (exhaustive) search for 6 nearest neighbors on one dataset
+
+This example uses the `O(n^2)` naive search (not the tree-based search).
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// Our dataset matrix, which is column-major.
+extern arma::mat dataset;
+
+KNN a(dataset, true);
+
+// The matrices we will store output in.
+arma::Mat<size_t> resultingNeighbors;
+arma::mat resultingDistances;
+
+a.Search(6, resultingNeighbors, resultingDistances);
+```
+
+Needless to say, naive search can be very slow...
+
+## The extensible `NeighborSearch` class
+
+The `NeighborSearch` class is very extensible, having the following template
+arguments:
+
+```c++
+template<
+  typename SortPolicy = NearestNeighborSort,
+  typename MetricType = EuclideanDistance,
+  typename MatType = arma::mat,
+  template<typename TreeMetricType,
+           typename TreeStatType,
+           typename TreeMatType> class TreeType = KDTree,
+  template<typename RuleType> class TraversalType =
+      TreeType<MetricType, NeighborSearchStat<SortPolicy>,
+               MatType>::template DualTreeTraverser>
+>
+class NeighborSearch;
+```
+
+By choosing different components for each of these template classes, a very
+arbitrary neighbor searching object can be constructed.  Note that each of these
+template parameters have defaults, so it is not necessary to specify each one.
+
+### `SortPolicy` policy class
+
+The `SortPolicy` template parameter allows specification of how the
+NeighborSearch object will decide which points are to be searched for.  The
+`NearestNeighborSort` class is a well-documented example.  A custom `SortPolicy`
+class must implement the same methods which `NearestNeighborSort` does:
+
+```c++
+static size_t SortDistance(const arma::vec& list, double newDistance);
+
+static bool IsBetter(const double value, const double ref);
+
+template<typename TreeType>
+static double BestNodeToNodeDistance(const TreeType* queryNode,
+                                     const TreeType* referenceNode);
+
+template<typename TreeType>
+static double BestPointToNodeDistance(const arma::vec& queryPoint,
+                                      const TreeType* referenceNode);
+
+static const double WorstDistance();
+
+static const double BestDistance();
+```
+
+The `FurthestNeighborSort` class is another implementation, which is used to
+create the `KFN` typedef class, which finds the furthest neighbors, as opposed
+to the nearest neighbors.
+
+## `MetricType` policy class
+
+The `MetricType` policy class allows the neighbor search to take place in any
+arbitrary metric space.  The `LMetric` class is a good example implementation.
+A `MetricType` class must provide the following functions:
+
+```c++
+// Empty constructor is required.
+MetricType();
+
+// Compute the distance between two points.
+template<typename VecType>
+double Evaluate(const VecType& a, const VecType& b);
+```
+
+Internally, the `NeighborSearch` class keeps an instantiated `MetricType` class
+(which can be given in the constructor).   This is useful for a metric like the
+Mahalanobis distance (`MahalanobisDistance`), which must store state (the
+covariance matrix).  Therefore, you can write a non-static MetricType class and
+use it seamlessly with `NeighborSearch`.
+
+For more information on the `MetricType` policy, see the [documentation for
+`MetricType`s](../developer/metrics.md).
+
+### `MatType` policy class
+
+The `MatType` template parameter specifies the type of data matrix used.  This
+type must implement the same operations as an Armadillo matrix, and so standard
+choices are `arma::mat` and `arma::sp_mat`.
+
+### `TreeType` policy class
+
+The NeighborSearch class allows great extensibility in the selection of the type
+of tree used for search.  This type must follow the typical mlpack TreeType
+policy, documented [here](../developer/trees.md).
+
+Typical choices might include `KDTree`, `BallTree`, `StandardCoverTree`,
+`RTree`, or `RStarTree`.  It is easily possible to make your own tree type for
+use with NeighborSearch; consult the [TreeType
+documentation](../developer/trees.md) for more details.
+
+An example of using the `NeighborSearch` class with a ball tree is given below.
+
+```c++
+// Construct a NeighborSearch object with ball bounds.
+NeighborSearch<
+    NearestNeighborSort,
+    EuclideanDistance,
+    arma::mat,
+    BallTree
+> neighborSearch(dataset);
+```
+
+### `TraverserType` policy class
+
+The last template parameter the `NeighborSearch` class offers is the
+`TraverserType` class.  The `TraverserType` class holds the strategy used to
+traverse the trees in either single-tree or dual-tree search mode.  By default,
+it is set to use the default traverser of the given `TreeType` (which is the
+member `TreeType::DualTreeTraverser`).
+
+This class must implement the following two methods:
+
+```c++
+// Instantiate with a given RuleType.
+TraverserType(RuleType& rule);
+
+// Traverse with two trees.
+void Traverse(TreeType& queryNode, TreeType& referenceNode);
+```
+
+The `RuleType` class provides the following functions for use in the traverser:
+
+```c++
+// Evaluate the base case between two points.
+double BaseCase(const size_t queryIndex, const size_t referenceIndex);
+
+// Score the two nodes to see if they can be pruned, returning DBL_MAX if they
+// can be pruned.
+double Score(TreeType& queryNode, TreeType& referenceNode);
+```
+
+Note also that any traverser given must satisfy the definition of a pruning
+dual-tree traversal given in the paper "Tree-independent dual-tree algorithms".
+
+## Further documentation
+
+For further documentation on the NeighborSearch class, consult the comments in
+the source code, found in `mlpack/methods/neighbor_search/`.
diff -pruN 3.4.2-7/doc/tutorials/range_search/range_search.txt 4.0.1-1/doc/tutorials/range_search/range_search.txt
--- 3.4.2-7/doc/tutorials/range_search/range_search.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/range_search/range_search.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,402 +0,0 @@
-/*!
-
-@file range_search.txt
-@author Ryan Curtin
-@brief Tutorial for how to use the RangeSearch class.
-
-@page rstutorial RangeSearch tutorial (mlpack_range_search)
-
-@section intro_rstut Introduction
-
-Range search is a simple machine learning task which aims to find all the
-neighbors of a point that fall into a certain range of distances.  In this
-setting, we have a \b query and a \b reference dataset.  Given a certain range,
-for each point in the \b query dataset, we wish to know all points in the \b
-reference dataset which have distances within that given range to the given
-query point.
-
-Alternately, if the query and reference datasets are the same, the problem can
-be stated more simply: for each point in the dataset, we wish to know all points
-which have distance in the given range to that point.
-
-\b mlpack provides:
-
- - a \ref cli_rstut "simple command-line executable" to run range search
- - a \ref rs_rstut "simple C++ interface" to perform range search
- - a \ref rs_ext_rstut "generic, extensible, and powerful C++ class (RangeSearch)" for complex usage
-
-@section toc_rstut Table of Contents
-
-A list of all the sections this tutorial contains.
-
- - \ref intro_rstut
- - \ref toc_rstut
- - \ref cli_rstut
-   - \ref cli_ex1_rstut
-   - \ref cli_ex2_rstut
-   - \ref cli_ex3_rstut
- - \ref rs_rstut
-   - \ref rs_ex1_rstut
-   - \ref rs_ex2_rstut
-   - \ref rs_ex3_rstut
- - \ref rs_ext_rstut
-   - \ref metric_type_doc_rstut
-   - \ref mat_type_doc_rstut
-   - \ref tree_type_doc_rstut
- - \ref further_doc_rstut
-
-@section cli_rstut The 'mlpack_range_search' command-line executable
-
-\b mlpack provides an executable, \c mlpack_range_search, which can be used to
-perform range searches quickly and simply from the command-line.  This program
-will perform the range search and place the resulting neighbor index list into
-one file and their corresponding distances into another file.  These files are
-organized such that the first row corresponds to the neighbors (or distances) of
-the first query point, and the second row corresponds to the neighbors (or
-distances) of the second query point, and so forth.  The neighbors of a specific
-point are not arranged in any specific order.
-
-Because a range search may return different numbers of points (including zero),
-the output file is technically not a valid CSV and may not be loadable by other
-programs.  Therefore, if you need the results in a certain format, it may be
-better to use the \ref rs_rstut "C++ interface" to manually export the data in
-the preferred format.
-
-Below are several examples of simple usage (and the resultant output).  The '-v'
-option is used so that output is given.  Further documentation on each
-individual option can be found by typing
-
-@code
-$ mlpack_range_search --help
-@endcode
-
-@subsection cli_ex1_rstut One dataset, points with distance <= 0.01
-
-@code
-$ mlpack_range_search -r dataset.csv -n neighbors_out.csv -d distances_out.csv \
-> -U 0.076 -v
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded reference data from 'dataset.csv' (3x1000).
-[INFO ] Building reference tree...
-[INFO ] Tree built.
-[INFO ] Search for points in the range [0, 0.076] with dual-tree kd-tree
-search...
-[INFO ] Search complete.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   distances_file: distances_out.csv
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   leaf_size: 20
-[INFO ]   max: 0.01
-[INFO ]   min: 0
-[INFO ]   naive: false
-[INFO ]   neighbors_file: neighbors_out.csv
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: ""
-[INFO ]   random_basis: false
-[INFO ]   reference_file: dataset.csv
-[INFO ]   seed: 0
-[INFO ]   single_mode: false
-[INFO ]   tree_type: kd
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   loading_data: 0.005201s
-[INFO ]   range_search/computing_neighbors: 0.017110s
-[INFO ]   total_time: 0.033313s
-[INFO ]   tree_building: 0.002500s
-@endcode
-
-Convenient program timers are given for different parts of the calculation at
-the bottom of the output, as well as the parameters the simulation was run with.
-Now, if we look at the output files:
-
-@code
-$ head neighbors_out.csv
-862
-703
-
-397, 277, 319
-840
-732
-
-361
-547, 695
-113, 982, 689
-
-$ head distances_out.csv
-0.0598608
-0.0753264
-
-0.0207941, 0.0759762, 0.0471072
-0.0708221
-0.0568806
-
-0.0700532
-0.0529565, 0.0550988
-0.0447142, 0.0399286, 0.0734605
-@endcode
-
-We can see that only point 862 is within distance 0.076 of point 0.  We can
-also see that point 2 has no points within a distance of 0.076 -- that line is
-empty.
-
-@subsection cli_ex2_rstut Query and reference dataset, range [1.0, 1.5]
-
-@code
-$ mlpack_range_search -q query_dataset.csv -r reference_dataset.csv -n \
-> neighbors_out.csv -d distances_out.csv -L 1.0 -U 1.5 -v
-[INFO ] Loading 'reference_dataset.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded reference data from 'reference_dataset.csv' (3x1000).
-[INFO ] Building reference tree...
-[INFO ] Tree built.
-[INFO ] Loading 'query_dataset.csv' as CSV data.  Size is 3 x 50.
-[INFO ] Loaded query data from 'query_dataset.csv' (3x50).
-[INFO ] Search for points in the range [1, 1.5] with dual-tree kd-tree search...
-[INFO ] Building query tree...
-[INFO ] Tree built.
-[INFO ] Search complete.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   distances_file: distances_out.csv
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   leaf_size: 20
-[INFO ]   max: 1.5
-[INFO ]   min: 1
-[INFO ]   naive: false
-[INFO ]   neighbors_file: neighbors_out.csv
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: query_dataset.csv
-[INFO ]   random_basis: false
-[INFO ]   reference_file: reference_dataset.csv
-[INFO ]   seed: 0
-[INFO ]   single_mode: false
-[INFO ]   tree_type: kd
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   loading_data: 0.006199s
-[INFO ]   range_search/computing_neighbors: 0.024427s
-[INFO ]   total_time: 0.045403s
-[INFO ]   tree_building: 0.003979s
-@endcode
-
-@subsection cli_ex3_rstut One dataset, range [0.7 0.8], leaf size of 15 points
-
-The \b mlpack implementation of range search is a dual-tree algorithm; when
-\f$kd\f$-trees are used, the leaf size of the tree can be changed.  Depending on
-the characteristics of the dataset, a larger or smaller leaf size can provide
-faster computation.  The leaf size is modifiable through the command-line
-interface, as shown below.
-
-@code
-$ mlpack_range_search -r dataset.csv -n neighbors_out.csv -d distances_out.csv \
-> -L 0.7 -U 0.8 -l 15 -v
-[INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
-[INFO ] Loaded reference data from 'dataset.csv' (3x1000).
-[INFO ] Building reference tree...
-[INFO ] Tree built.
-[INFO ] Search for points in the range [0.7, 0.8] with dual-tree kd-tree
-search...
-[INFO ] Search complete.
-[INFO ]
-[INFO ] Execution parameters:
-[INFO ]   distances_file: distances_out.csv
-[INFO ]   help: false
-[INFO ]   info: ""
-[INFO ]   input_model_file: ""
-[INFO ]   leaf_size: 15
-[INFO ]   max: 0.8
-[INFO ]   min: 0.7
-[INFO ]   naive: false
-[INFO ]   neighbors_file: neighbors_out.csv
-[INFO ]   output_model_file: ""
-[INFO ]   query_file: ""
-[INFO ]   random_basis: false
-[INFO ]   reference_file: dataset.csv
-[INFO ]   seed: 0
-[INFO ]   single_mode: false
-[INFO ]   tree_type: kd
-[INFO ]   verbose: true
-[INFO ]   version: false
-[INFO ]
-[INFO ] Program timers:
-[INFO ]   loading_data: 0.006298s
-[INFO ]   range_search/computing_neighbors: 0.411041s
-[INFO ]   total_time: 0.539931s
-[INFO ]   tree_building: 0.004695s
-@endcode
-
-Further documentation on options should be found by using the --help option.
-
-@section rs_rstut The 'RangeSearch' class
-
-The 'RangeSearch' class is an extensible template class which allows a high
-level of flexibility.  However, all of the template arguments have default
-parameters, allowing a user to simply use 'RangeSearch<>' for simple usage
-without worrying about the exact necessary template parameters.
-
-The class bears many similarities to the \ref nstutorial "NeighborSearch" class;
-usage generally consists of calling the constructor with one or two datasets,
-and then calling the 'Search()' method to perform the actual range search.
-
-The 'Search()' method stores the results in two vector-of-vector objects.  This
-is necessary because each query point may have a different number of neighbors
-in the specified distance range.  The structure of those two objects is very
-similar to the output files --neighbors_file and --distances_file for the CLI
-interface (see above).  A handful of examples of simple usage of the RangeSearch
-class are given below.
-
-@subsection rs_ex1_rstut Distance less than 2.0 on a single dataset
-
-@code
-#include <mlpack/methods/range_search/range_search.hpp>
-
-using namespace mlpack::range;
-
-// Our dataset matrix, which is column-major.
-extern arma::mat data;
-
-RangeSearch<> a(data);
-
-// The vector-of-vector objects we will store output in.
-std::vector<std::vector<size_t> > resultingNeighbors;
-std::vector<std::vector<double> > resultingDistances;
-
-// The range we will use.
-math::Range r(0.0, 2.0); // [0.0, 2.0].
-
-a.Search(r, resultingNeighbors, resultingDistances);
-@endcode
-
-The output of the search is stored in resultingNeighbors and resultingDistances.
-
-@subsection rs_ex2_rstut Range [3.0, 4.0] on a query and reference dataset
-
-@code
-#include <mlpack/methods/range_search/range_search.hpp>
-
-using namespace mlpack::range;
-
-// Our dataset matrices, which are column-major.
-extern arma::mat queryData, referenceData;
-
-RangeSearch<> a(referenceData);
-
-// The vector-of-vector objects we will store output in.
-std::vector<std::vector<size_t> > resultingNeighbors;
-std::vector<std::vector<double> > resultingDistances;
-
-// The range we will use.
-math::Range r(3.0, 4.0); // [3.0, 4.0].
-
-a.Search(queryData, r, resultingNeighbors, resultingDistances);
-@endcode
-
-@subsection rs_ex3_rstut Naive (exhaustive) search for distance greater than 5.0 on one dataset
-
-This example uses the O(n^2) naive search (not the tree-based search).
-
-@code
-#include <mlpack/methods/range_search/range_search.hpp>
-
-using namespace mlpack::range;
-
-// Our dataset matrix, which is column-major.
-extern arma::mat dataset;
-
-// The 'true' option indicates that we will use naive calculation.
-RangeSearch<> a(dataset, true);
-
-// The vector-of-vector objects we will store output in.
-std::vector<std::vector<size_t> > resultingNeighbors;
-std::vector<std::vector<double> > resultingDistances;
-
-// The range we will use.  The upper bound is DBL_MAX.
-math::Range r(5.0, DBL_MAX); // [5.0, inf).
-
-a.Search(r, resultingNeighbors, resultingDistances);
-@endcode
-
-Needless to say, naive search can be very slow...
-
-@section rs_ext_rstut The extensible 'RangeSearch' class
-
-Similar to the \ref nstutorial "NeighborSearch class", the RangeSearch class is
-very extensible, having the following template arguments:
-
-@code
-template<typename MetricType = metric::EuclideanDistance,
-         typename MatType = arma::mat,
-         template<typename TreeMetricType,
-                  typename TreeStatType,
-                  typename TreeMatType> class TreeType = tree::KDTree>
-class RangeSearch;
-@endcode
-
-By choosing different components for each of these template classes, a very
-arbitrary range searching object can be constructed.
-
-@subsection metric_type_doc_rstut MetricType policy class
-
-The MetricType policy class allows the range search to take place in any
-arbitrary metric space.  The mlpack::metric::LMetric class is a good example
-implementation.  A MetricType class must provide the following functions:
-
-@code
-// Empty constructor is required.
-MetricType();
-
-// Compute the distance between two points.
-template<typename VecType>
-double Evaluate(const VecType& a, const VecType& b);
-@endcode
-
-Internally, the RangeSearch class keeps an instantiated MetricType class (which
-can be given in the constructor).   This is useful for a metric like the
-Mahalanobis distance (mlpack::metric::MahalanobisDistance), which must store
-state (the covariance matrix).  Therefore, you can write a non-static MetricType
-class and use it seamlessly with RangeSearch.
-
-@subsection mat_type_doc_rstut MatType policy class
-
-The MatType template parameter specifies the type of data matrix used.  This
-type must implement the same operations as an Armadillo matrix, and so standard
-choices are @c arma::mat and @c arma::sp_mat.
-
-@subsection tree_type_doc_rstut TreeType policy class
-
-The RangeSearch class also allows a custom tree to be used.  The TreeType policy
-is also used elsewhere in mlpack and is documented more thoroughly
-\ref trees "here".
-
-Typical choices might include mlpack::tree::KDTree (the default),
-mlpack::tree::BallTree, mlpack::tree::RTree, mlpack::tree::RStarTree,
-or mlpack::tree::StandardCoverTree.  Below is an example that uses the
-RangeSearch class with an R-tree:
-
-@code
-// Construct a RangeSearch object with ball bounds.
-RangeSearch<
-    metric::EuclideanDistance,
-    arma::mat,
-    tree::RTree
-> rangeSearch(dataset);
-@endcode
-
-For further information on trees, including how to write your own tree for use
-with RangeSearch and other mlpack methods, see the
-\ref trees "TreeType policy documentation".
-
-@section further_doc_rstut Further documentation
-
-For further documentation on the RangeSearch class, consult the
-\ref mlpack::range::RangeSearch "complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/range_search.md 4.0.1-1/doc/tutorials/range_search.md
--- 3.4.2-7/doc/tutorials/range_search.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/range_search.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,377 @@
+# RangeSearch tutorial (`mlpack_range_search`)
+
+Range search is a simple machine learning task which aims to find all the
+neighbors of a point that fall into a certain range of distances.  In this
+setting, we have a *query* and a *reference* dataset.  Given a certain range,
+for each point in the *query* dataset, we wish to know all points in the \b
+reference dataset which have distances within that given range to the given
+query point.
+
+Alternately, if the query and reference datasets are the same, the problem can
+be stated more simply: for each point in the dataset, we wish to know all points
+which have distance in the given range to that point.
+
+mlpack provides:
+
+ - a simple command-line executable to run range search
+ - a simple C++ interface to perform range search
+ - a generic, extensible, and powerful C++ class (`RangeSearch`) for complex
+   usage
+
+## The `mlpack_range_search` command-line executable
+
+mlpack provides a command-line program, `mlpack_range_search`, which can be used
+to perform range searches quickly and simply.  *(Note that unlike other
+bindings, a range search binding is not currently available in other languages
+that mlpack provides bindings to.)*
+
+The `mlpack_range_search` program will perform the range search and place the
+resulting neighbor index list into one file and their corresponding distances
+into another file.  These files are organized such that the first row
+corresponds to the neighbors (or distances) of the first query point, and the
+second row corresponds to the neighbors (or distances) of the second query
+point, and so forth.  The neighbors of a specific point are not arranged in any
+specific order.
+
+Because a range search may return different numbers of points (including zero),
+the output file is technically not a valid CSV and may not be loadable by other
+programs.  Therefore, if you need the results in a certain format, it may be
+better to use the C++ interface to manually export the data in the preferred
+format.
+
+Below are several examples of simple usage (and the resultant output).  The `-v`
+option is used so that output is given.  Further documentation on each
+individual option can be found by typing
+
+```sh
+$ mlpack_range_search --help
+```
+
+### One dataset, points with distance <= 0.01
+
+```sh
+$ mlpack_range_search -r dataset.csv -n neighbors_out.csv -d distances_out.csv \
+> -U 0.076 -v
+[INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded reference data from 'dataset.csv' (3x1000).
+[INFO ] Building reference tree...
+[INFO ] Tree built.
+[INFO ] Search for points in the range [0, 0.076] with dual-tree kd-tree
+search...
+[INFO ] Search complete.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   distances_file: distances_out.csv
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   leaf_size: 20
+[INFO ]   max: 0.01
+[INFO ]   min: 0
+[INFO ]   naive: false
+[INFO ]   neighbors_file: neighbors_out.csv
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: ""
+[INFO ]   random_basis: false
+[INFO ]   reference_file: dataset.csv
+[INFO ]   seed: 0
+[INFO ]   single_mode: false
+[INFO ]   tree_type: kd
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   loading_data: 0.005201s
+[INFO ]   range_search/computing_neighbors: 0.017110s
+[INFO ]   total_time: 0.033313s
+[INFO ]   tree_building: 0.002500s
+```
+
+Convenient program timers are given for different parts of the calculation at
+the bottom of the output, as well as the parameters the simulation was run with.
+Now, if we look at the output files:
+
+```sh
+$ head neighbors_out.csv
+862
+703
+
+397, 277, 319
+840
+732
+
+361
+547, 695
+113, 982, 689
+
+$ head distances_out.csv
+0.0598608
+0.0753264
+
+0.0207941, 0.0759762, 0.0471072
+0.0708221
+0.0568806
+
+0.0700532
+0.0529565, 0.0550988
+0.0447142, 0.0399286, 0.0734605
+```
+
+We can see that only point 862 is within distance 0.076 of point 0.  We can
+also see that point 2 has no points within a distance of 0.076---that line is
+empty.
+
+### Query and reference dataset, range `[1.0, 1.5]`
+
+```sh
+$ mlpack_range_search -q query_dataset.csv -r reference_dataset.csv -n \
+> neighbors_out.csv -d distances_out.csv -L 1.0 -U 1.5 -v
+[INFO ] Loading 'reference_dataset.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded reference data from 'reference_dataset.csv' (3x1000).
+[INFO ] Building reference tree...
+[INFO ] Tree built.
+[INFO ] Loading 'query_dataset.csv' as CSV data.  Size is 3 x 50.
+[INFO ] Loaded query data from 'query_dataset.csv' (3x50).
+[INFO ] Search for points in the range [1, 1.5] with dual-tree kd-tree search...
+[INFO ] Building query tree...
+[INFO ] Tree built.
+[INFO ] Search complete.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   distances_file: distances_out.csv
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   leaf_size: 20
+[INFO ]   max: 1.5
+[INFO ]   min: 1
+[INFO ]   naive: false
+[INFO ]   neighbors_file: neighbors_out.csv
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: query_dataset.csv
+[INFO ]   random_basis: false
+[INFO ]   reference_file: reference_dataset.csv
+[INFO ]   seed: 0
+[INFO ]   single_mode: false
+[INFO ]   tree_type: kd
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   loading_data: 0.006199s
+[INFO ]   range_search/computing_neighbors: 0.024427s
+[INFO ]   total_time: 0.045403s
+[INFO ]   tree_building: 0.003979s
+```
+
+### One dataset, range `[0.7, 0.8]`, leaf size of 15 points
+
+The mlpack implementation of range search is a dual-tree algorithm; when
+`kd`-trees are used, the leaf size of the tree can be changed.  Depending on the
+characteristics of the dataset, a larger or smaller leaf size can provide faster
+computation.  The leaf size is modifiable through the command-line interface, as
+shown below.
+
+```sh
+$ mlpack_range_search -r dataset.csv -n neighbors_out.csv -d distances_out.csv \
+> -L 0.7 -U 0.8 -l 15 -v
+[INFO ] Loading 'dataset.csv' as CSV data.  Size is 3 x 1000.
+[INFO ] Loaded reference data from 'dataset.csv' (3x1000).
+[INFO ] Building reference tree...
+[INFO ] Tree built.
+[INFO ] Search for points in the range [0.7, 0.8] with dual-tree kd-tree
+search...
+[INFO ] Search complete.
+[INFO ]
+[INFO ] Execution parameters:
+[INFO ]   distances_file: distances_out.csv
+[INFO ]   help: false
+[INFO ]   info: ""
+[INFO ]   input_model_file: ""
+[INFO ]   leaf_size: 15
+[INFO ]   max: 0.8
+[INFO ]   min: 0.7
+[INFO ]   naive: false
+[INFO ]   neighbors_file: neighbors_out.csv
+[INFO ]   output_model_file: ""
+[INFO ]   query_file: ""
+[INFO ]   random_basis: false
+[INFO ]   reference_file: dataset.csv
+[INFO ]   seed: 0
+[INFO ]   single_mode: false
+[INFO ]   tree_type: kd
+[INFO ]   verbose: true
+[INFO ]   version: false
+[INFO ]
+[INFO ] Program timers:
+[INFO ]   loading_data: 0.006298s
+[INFO ]   range_search/computing_neighbors: 0.411041s
+[INFO ]   total_time: 0.539931s
+[INFO ]   tree_building: 0.004695s
+```
+
+Further documentation on options should be found by using the `--help` option.
+
+## The `RangeSearch` class
+
+The `RangeSearch` class is an extensible template class which allows a high
+level of flexibility.  However, all of the template arguments have default
+parameters, allowing a user to simply use `RangeSearch<>` for simple usage
+without worrying about the exact necessary template parameters.
+
+The class bears many similarities to the [`NeighborSearch`](neighbor_search.md)
+class; usage generally consists of calling the constructor with one or two
+datasets, and then calling the `Search()` method to perform the actual range
+search.
+
+The `Search()` method stores the results in two vector-of-vector objects.  This
+is necessary because each query point may have a different number of neighbors
+in the specified distance range.  The structure of those two objects is very
+similar to the output files `--neighbors_file` and `--distances_file` for the
+command-line interface (see above).  A handful of examples of simple usage of
+the `RangeSearch` class are given below.
+
+### Distance less than `2.0` on a single dataset
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// Our dataset matrix, which is column-major.
+extern arma::mat data;
+
+RangeSearch<> a(data);
+
+// The vector-of-vector objects we will store output in.
+std::vector<std::vector<size_t> > resultingNeighbors;
+std::vector<std::vector<double> > resultingDistances;
+
+// The range we will use.
+Range r(0.0, 2.0); // [0.0, 2.0].
+
+a.Search(r, resultingNeighbors, resultingDistances);
+```
+
+The output of the search is stored in `resultingNeighbors` and
+`resultingDistances`.
+
+### Range `[3.0, 4.0]` on a query and reference dataset
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// Our dataset matrices, which are column-major.
+extern arma::mat queryData, referenceData;
+
+RangeSearch<> a(referenceData);
+
+// The vector-of-vector objects we will store output in.
+std::vector<std::vector<size_t> > resultingNeighbors;
+std::vector<std::vector<double> > resultingDistances;
+
+// The range we will use.
+Range r(3.0, 4.0); // [3.0, 4.0].
+
+a.Search(queryData, r, resultingNeighbors, resultingDistances);
+```
+
+### Naive (exhaustive) search for distance greater than `5.0` on one dataset
+
+This example uses the `O(n^2)` naive search (not the tree-based search).
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+
+// Our dataset matrix, which is column-major.
+extern arma::mat dataset;
+
+// The 'true' option indicates that we will use naive calculation.
+RangeSearch<> a(dataset, true);
+
+// The vector-of-vector objects we will store output in.
+std::vector<std::vector<size_t> > resultingNeighbors;
+std::vector<std::vector<double> > resultingDistances;
+
+// The range we will use.  The upper bound is DBL_MAX.
+Range r(5.0, DBL_MAX); // [5.0, inf).
+
+a.Search(r, resultingNeighbors, resultingDistances);
+```
+
+Needless to say, naive search can be very slow...
+
+## The extensible `RangeSearch` class
+
+Similar to the [`NeighborSearch` class](neighbor_search.md), the `RangeSearch`
+class is very extensible, having the following template arguments:
+
+```c++
+template<typename MetricType = EuclideanDistance,
+         typename MatType = arma::mat,
+         template<typename TreeMetricType,
+                  typename TreeStatType,
+                  typename TreeMatType> class TreeType = KDTree>
+class RangeSearch;
+```
+
+By choosing different components for each of these template classes, a very
+arbitrary range searching object can be constructed.
+
+### `MetricType` policy class
+
+The `MetricType` policy class allows the range search to take place in any
+arbitrary metric space.  The `LMetric` class is a good example implementation.
+A `MetricType` class must provide the following functions:
+
+```c++
+// Empty constructor is required.
+MetricType();
+
+// Compute the distance between two points.
+template<typename VecType>
+double Evaluate(const VecType& a, const VecType& b);
+```
+
+Internally, the `RangeSearch` class keeps an instantiated `MetricType` class
+(which can be given in the constructor).   This is useful for a metric like the
+Mahalanobis distance (`MahalanobisDistance`), which must store state (the
+covariance matrix).  Therefore, you can write a non-static `MetricType` class
+and use it seamlessly with `RangeSearch`.
+
+See also the [documentation for the `MetricType`
+policy](../developer/metrics.md).
+
+### `MatType` policy class
+
+The `MatType` template parameter specifies the type of data matrix used.  This
+type must implement the same operations as an Armadillo matrix, and so standard
+choices are `arma::mat` and `arma::sp_mat`.
+
+### `TreeType` policy class
+
+The `RangeSearch` class also allows a custom tree to be used.  The `TreeType`
+policy is also used elsewhere in mlpack and is documented more thoroughly
+[here](../developer/trees.md).
+
+Typical choices might include `KDTree` (the default), `BallTree`, `RTree`,
+`RStarTree`, or `StandardCoverTree`.  Below is an example that uses the
+`RangeSearch` class with an R-tree:
+
+```c++
+// Construct a RangeSearch object with ball bounds.
+RangeSearch<EuclideanDistance, arma::mat, RTree> rangeSearch(dataset);
+```
+
+For further information on trees, including how to write your own tree for use
+with `RangeSearch` and other mlpack methods, see the [TreeType policy
+documentation](../developer/trees.md).
+
+## Further documentation
+
+For further documentation on the `RangeSearch` class, consult the documentation
+in the source code, found in `mlpack/methods/range_search/`.
diff -pruN 3.4.2-7/doc/tutorials/README.md 4.0.1-1/doc/tutorials/README.md
--- 3.4.2-7/doc/tutorials/README.md	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/README.md	2022-12-29 15:40:18.000000000 +0000
@@ -1,40 +1,18 @@
-
 ## Tutorials
 
-Tutorials for mlpack can be found [here : mlpack tutorials](https://www.mlpack.org/doc/mlpack-git/doxygen/tutorials.html).
-
-
-### General mlpack tutorials
-
-These tutorials introduce the basic concepts of working with mlpack, aimed at developers who want to use and contribute to mlpack but are not sure where to start.
-
-* [Building mlpack from source](https://www.mlpack.org/doc/mlpack-git/doxygen/build.html)
-* [File Formats in mlpack](https://www.mlpack.org/doc/mlpack-git/doxygen/formatdoc.html)
-* [Matrices in mlpack](https://www.mlpack.org/doc/mlpack-git/doxygen/matrices.html)
-* [mlpack input and output](https://www.mlpack.org/doc/mlpack-git/doxygen/iodoc.html)
-* [mlpack timers](https://www.mlpack.org/doc/mlpack-git/doxygen/timer.html)
-* [Simple sample mlpack programs](https://www.mlpack.org/doc/mlpack-git/doxygen/sample.html)
-
-
-### Method-specific tutorials
-
-These tutorials introduce the various methods mlpack offers, aimed at users who want to get started quickly. These tutorials start with simple examples and progress to complex, extensible uses.
-
-* [NeighborSearch tutorial (mlpack_knn / mlpack_kfn)](https://www.mlpack.org/doc/mlpack-git/doxygen/nstutorial.html)
-* [LinearRegression tutorial (mlpack_linear_regression)](https://www.mlpack.org/doc/mlpack-git/doxygen/lrtutorial.html)
-* [RangeSearch tutorial (mlpack_range_search)](https://www.mlpack.org/doc/mlpack-git/doxygen/rstutorial.html)
-* [Density Estimation Trees tutorial (mlpack_det)](https://www.mlpack.org/doc/mlpack-git/doxygen/dettutorial.html)
-* [K-Means tutorial (mlpack_kmeans)](https://www.mlpack.org/doc/mlpack-git/doxygen/kmtutorial.html)
-* [FastMKS tutorial (mlpack_fastmks)](https://www.mlpack.org/doc/mlpack-git/doxygen/fmkstutorial.html)
-* [Euclidean Minimum Spanning Trees tutorial (mlpack_emst)](https://www.mlpack.org/doc/mlpack-git/doxygen/emst_tutorial.html)
-* [Alternating Matrix Factorization Tutorial](https://www.mlpack.org/doc/mlpack-git/doxygen/amftutorial.html)
-* [Collaborative Filtering Tutorial](https://www.mlpack.org/doc/mlpack-git/doxygen/cftutorial.html)
-
-
-### Policy Class Documentation
-
-mlpack uses templates to achieve its genericity and flexibility. Some of the template types used by mlpack are common across multiple machine learning algorithms. The links below provide documentation for some of these common types.
+Tutorials for mlpack can be found in this directory.
 
-* [The MetricType policy in mlpack](https://www.mlpack.org/doc/mlpack-git/doxygen/metrics.html)
-* [The KernelType policy in mlpack](https://www.mlpack.org/doc/mlpack-git/doxygen/kernels.html)
-* [The TreeType policy in mlpack](https://www.mlpack.org/doc/mlpack-git/doxygen/trees.html)
+ - [Alternating Matrix Factorization (AMF)](amf.md)
+ - [Artificial Neural Networks (ANN)](ann.md)
+ - [Approximate k-Furthest Neighbor Search (`approx_kfn`)](approx_kfn.md)
+ - [Collaborative Filtering (CF)](cf.md)
+ - [DatasetMapper](datasetmapper.md)
+ - [Density Estimation Trees (DET)](det.md)
+ - [Euclidean Minimum Spanning Trees (EMST)](emst.md)
+ - [Fast Max-Kernel Search (FastMKS)](fastmks.md)
+ - [Image Utilities](image.md)
+ - [k-Means Clustering](kmeans.md)
+ - [Linear Regression](linear_regression.md)
+ - [Neighbor Search (k-Nearest-Neighbors)](neighbor_search.md)
+ - [Range Search](range_search.md)
+ - [Reinforcement Learning](reinforcement_learning.md)
diff -pruN 3.4.2-7/doc/tutorials/reinforcement_learning/reinforcement_learning.txt 4.0.1-1/doc/tutorials/reinforcement_learning/reinforcement_learning.txt
--- 3.4.2-7/doc/tutorials/reinforcement_learning/reinforcement_learning.txt	2020-10-28 16:10:49.000000000 +0000
+++ 4.0.1-1/doc/tutorials/reinforcement_learning/reinforcement_learning.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,407 +0,0 @@
-/*!
-@file reinforcement_learning.txt
-@author Sriram S K
-@author Joel Joseph
-@brief Tutorial for how to use the Reinforcement Learning module in mlpack.
-
-@page rltutorial Reinforcement Learning Tutorial
-
-@section intro_rltut Introduction
-
-Reinforcement Learning is one of the hottest topics right now, with
-interest surging after DeepMind published their article on training
-deep neural networks to play Atari games to great success. mlpack
-implements a complete end-to-end framework for Reinforcement Learning,
-featuring multiple environments, policies and methods. Of course,
-custom environments and policies can be used and plugged into the
-existing framework with no runtime overhead.
-
-mlpack implements typical benchmark environments (Acrobot, Mountain car etc.),
-commonly used policies, replay methods and supports asynchronous
-learning as well. In addition, it can [communicate](https://github.com/zoq/gym_tcp_api)
-with the OpenAI Gym toolkit for more environments.
-
-@section toc_rltut Table of Contents
-
-This tutorial is split into the following sections:
-
- - \ref intro_rltut
- - \ref toc_rltut
- - \ref environment_rltut
- - \ref agent_components_rltut
- - \ref q_learning_rltut
- - \ref async_learning_rltut
- - \ref further_rltut
-
-@section environment_rltut Reinforcement Learning Environments
-
-mlpack implements a number of the most popular environments used for testing
-RL agents and algorithms. These include the Cart Pole, Acrobot, Mountain Car
-and their variations. Of course, as mentioned above, you can communicate with
-OpenAI Gym for other environments, like the Atari video games.
-
-A key component of mlpack is its extensibility. It is a simple process to create
-your own custom environments, specific to your needs, and use it with mlpack's
-RL framework. All the environments implement a few specific methods and classes
-which are used by the agents while learning.
-
-- \c State: The State class is a representation of the environment. For the CartPole,
-   this would involve storing the position, velocity, angle and angular velocity.
-
-- \c Action: For discrete environments, Action is a class with an enum naming all the possible
-   actions the agent can take in the environment. Continuing with the CartPole example, the enum
-   would simply contain the two possible actions, `backward` and `forward`. For continuous environments,
-   the Action class contains an array with its size depending on the action space.
-
-- \c Sample: This method is perhaps the heart of the environment, providing rewards to
-   the agent depending on the state and the action taken, and updates the state based on
-   the action taken as well.
-
-Of course, your custom environment will most likely make use of a number of helper methods, depending
-on your application, such as the \c Dsdt method in the \c Acrobot environment, used in the \c RK4
-iterative method (also another helper method) to estimate the next state.
-
-@section agent_components_rltut Components of an RL Agent
-
-A Reinforcement Learning agent, in general, takes actions in an environment in order
-to maximize a cumulative reward. To that end, it requires a way to choose actions (\b policy)
-and a way to sample previous experiences (\b replay).
-
-An example of a simple policy would be an epsilon-greedy policy. Using such a policy, the agent
-will choose actions greedily with some probability epsilon. This probability is slowly decreased
-over time, balancing the line between exploration and exploitation.
-
-Similarly, an example of a simple replay would be a random replay. At each time step, the
-interactions between the agent and the environment are saved to a memory buffer and previous
-experiences are sampled from the buffer to train the agent.
-
-Instantiating the components of an agent can be easily done by passing the Environment as
-a templated argument and the parameters of the policy/replay to the constructor.
-
-To create a Greedy Policy and Prioritized Replay for the CartPole environment, we would do the
-following:
-
-@code
-GreedyPolicy<CartPole> policy(1.0, 1000, 0.1);
-PrioritizedReplay<CartPole> replayMethod(10, 10000, 0.6);
-@endcode
-
-The arguments to `policy` are the initial epsilon values, the interval of decrease in its value
-and the value at which epsilon bottoms out and won't be reduced further. The arguments to
-`replayMethod` are size of the batch returned, the number of examples stored in memory, and the
-degree of prioritization.
-
-In addition to the above components, an RL agent requires many hyperparameters to be tuned during
- it's training period. These parameters include everything from the discount rate of the future
-reward to whether Double Q-learning should be used or not. The `TrainingConfig` class can be
-instantiated and configured as follows:
-
-@code
-  TrainingConfig config;
-  config.StepSize() = 0.01;
-  config.Discount() = 0.9;
-  config.TargetNetworkSyncInterval() = 100;
-  config.ExplorationSteps() = 100;
-  config.DoubleQLearning() = false;
-  config.StepLimit() = 200;
-@endcode
-
-The object `config` describes an RL agent, using a step size of 0.01 for the optimization process,
-a discount factor of 0.9, sync interval of 200 episodes. This agent only starts learning after storing
-100 exploration steps, has a step limit of 200, and does not utilize double q-learning.
-
-In this way, we can easily configure an RL agent with the desired hyperparameters.
-
-@section q_learning_rltut Q-Learning in mlpack
-
-Here, we demonstrate Q-Learning in mlpack through the use of a simple example, the training of a Q-Learning
-agent on the CartPole environment. The code has been broken into chunks for easy understanding.
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/ffn.hpp>
-#include <mlpack/methods/ann/init_rules/gaussian_init.hpp>
-#include <mlpack/methods/ann/layer/layer.hpp>
-#include <mlpack/methods/ann/loss_functions/mean_squared_error.hpp>
-#include <mlpack/methods/reinforcement_learning/q_learning.hpp>
-#include <mlpack/methods/reinforcement_learning/q_networks/simple_dqn.hpp>
-#include <mlpack/methods/reinforcement_learning/environment/cart_pole.hpp>
-#include <mlpack/methods/reinforcement_learning/policy/greedy_policy.hpp>
-#include <mlpack/methods/reinforcement_learning/training_config.hpp>
-#include <ensmallen.hpp>
-
-using namespace mlpack;
-using namespace mlpack::ann;
-using namespace ens;
-using namespace mlpack::rl;
-@endcode
-
-We include all the necessary components of our toy example and declare namespaces for convenience.
-
-@code
-int main()
-{
-  // Set up the network.
-  SimpleDQN<> model(4, 64, 32, 2);
-@endcode
-
-The first step in setting our Q-learning agent is to setup the network for it to use. SimpleDQN class creates a
-simple feed forward network with 2 hidden layers. The network constructed here has an input shape of 4 and
-output shape of 2. This corresponds to the structure of the CartPole environment, where each state is
-represented as a column vector with 4 data members (position, velocity, angle, angular velocity). Similarly,
-the output shape is represented by the number of possible actions, which in this case, is only 2
-(`foward` and `backward`).
-
-We can also use mlpack's ann module to setup a custom FFN network. For example, here we use a single
-hidden layer.
-
-@code
-int main()
-{
-  // Set up the network.
-  FFN<MeanSquaredError<>, GaussianInitialization> model(MeanSquaredError<>(),
-      GaussianInitialization(0, 0.001));
-  model.Add<Linear<>>(4, 128);
-  model.Add<ReLULayer<>>();
-  model.Add<Linear<>>(128, 128);
-  model.Add<ReLULayer<>>();
-  model.Add<Linear<>>(128, 2);
-
-@endcode
-
-The next step would be to setup the other components of the Q-learning agent, namely its policy, replay
-method and hyperparameters.
-
-@code
- // Set up the policy and replay method.
-  GreedyPolicy<CartPole> policy(1.0, 1000, 0.1, 0.99);
-  RandomReplay<CartPole> replayMethod(10, 10000);
-
-  TrainingConfig config;
-  config.StepSize() = 0.01;
-  config.Discount() = 0.9;
-  config.TargetNetworkSyncInterval() = 100;
-  config.ExplorationSteps() = 100;
-  config.DoubleQLearning() = false;
-  config.StepLimit() = 200;
-@endcode
-
-And now, we get to the heart of the program, declaring a Q-Learning agent.
-
-@code
-  QLearning<CartPole, decltype(model), AdamUpdate, decltype(policy)>
-      agent(config, model, policy, replayMethod);
-@endcode
-
-Here, we call the `QLearning` constructor, passing in the type of environment,
-network, updater, policy and replay. We use `decltype(var)` as a shorthand for
-the variable, saving us the trouble of copying the lengthy templated type.
-
-We pass references of the objects we created, as parameters to QLearning class.
-
-Now, we have our Q-Learning agent `agent` ready to be trained on the Cart Pole environment.
-
-@code
-  arma::running_stat<double> averageReturn;
-  size_t episodes = 0;
-  bool converged = true;
-  while (true)
-  {
-    double episodeReturn = agent.Episode();
-    averageReturn(episodeReturn);
-    episodes += 1;
-
-    if (episodes > 1000)
-    {
-      std::cout << "Cart Pole with DQN failed." << std::endl;
-      converged = false;
-      break;
-    }
-
-    /**
-     * Reaching running average return 35 is enough to show it works.
-     */
-    std::cout << "Average return: " << averageReturn.mean()
-        << " Episode return: " << episodeReturn << std::endl;
-    if (averageReturn.mean() > 35)
-      break;
-  }
-  if (converged)
-    std::cout << "Hooray! Q-Learning agent successfully trained" << std::endl;
-
-  return 0;
-}
-@endcode
-
-We set up a loop to train the agent. The exit condition is determined by the average
-reward which can be computed with `arma::running_stat`. It is used for storing running
-statistics of scalars, which in this case is the reward signal. The agent can be said
-to have converged when the average return reaches a predetermined value (i.e. > 35).
-
-Conversely, if the average return does not go beyond that amount even after a thousand
-episodes, we can conclude that the agent will not converge and exit the training loop.
-
-@section async_learning_rltut
-
-In 2016, Researchers at Deepmind and University of Montreal published their paper
-"Asynchronous Methods for Deep Reinforcement Learning". In it they described asynchronous
-variants of four standard reinforcement learning algorithms:
-	- One-Step SARSA
-	- One-Step Q-Learning
-	- N-Step Q-Learning
-	- Advantage Actor-Critic(A3C)
-
-Online RL algorithms and Deep Neural Networks make an unstable combination because of the
-non-stationary and correlated nature of online updates. Although this is solved by Experience Replay,
-it has several drawbacks: it uses more memory and computation per real interaction; and it requires
-off-policy learning algorithms.
-
-Asynchronous methods, instead of experience replay, asynchronously executes multiple agents
-in parallel, on multiple instances of the environment, which solves all the above problems.
-
-Here, we demonstrate Asynchronous Learning methods in mlpack through the training of an async
-agent. Asynchronous learning involves training several agents simultaneously. Here, each of the
-agents are referred to as "workers". Currently mlpack has One-Step Q-Learning worker, N-Step
-Q-Learning worker and One-Step SARSA worker.
-
-Let's examine the sample code in chunks.
-
-Apart from the includes used for the q-learning example, two more have to be included:
-
-@code
-#include <mlpack/methods/reinforcement_learning/async_learning.hpp>
-#include <mlpack/methods/reinforcement_learning/policy/aggregated_policy.hpp>
-@endcode
-
-Here we don't use experience replay, and instead of a single policy, we use three different
-policies, each corresponding to its worker. Number of workers created, depends on the number of
-policies given in the Aggregated Policy. The column vector contains the probability distribution
-for each child policy. We should make sure its size is same as the number of policies and the sum
-of its elements is equal to 1.
-
-@code
-AggregatedPolicy<GreedyPolicy<CartPole>> policy({GreedyPolicy<CartPole>(0.7, 5000, 0.1),
-                                                 GreedyPolicy<CartPole>(0.7, 5000, 0.01),
-                                                 GreedyPolicy<CartPole>(0.7, 5000, 0.5)},
-                                                 arma::colvec("0.4 0.3 0.3"));
-@endcode
-
-Now, we will create the "OneStepQLearning" agent. We could have used "NStepQLearning" or "OneStepSarsa"
-here according to our requirement.
-
-@code
-OneStepQLearning<CartPole, decltype(model), ens::AdamUpdate, decltype(policy)>
-    agent(std::move(config), std::move(model), std::move(policy));
-@endcode
-
-Here, unlike the Q-Learning example, instead of the entire while loop, we use the Train method of the Asynchronous
-Learning class inside a for loop. 100 training episodes will take around 50 seconds.
-
-@code
-for (int i = 0; i < 100; i++)
-{
-  agent.Train(measure);
-}
-@endcode
-
-What is "measure" here? It is a lambda function which returns a boolean value (indicating the end of training)
-and accepts the episode return (total reward of a deterministic test episode) as parameter.
-So, let's create that.
-
-@code
-arma::vec returns(20, arma::fill::zeros);
-size_t position = 0;
-size_t episode = 0;
-
-auto measure = [&returns, &position, &episode](double episodeReturn)
-{
-  if(episode > 10000) return true;
-
-  returns[position++] = episodeReturn;
-  position = position % returns.n_elem;
-  episode++;
-
-  std::cout << "Episode No.: " << episode
-      << "; Episode Return: " << episodeReturn
-      << "; Average Return: " << arma::mean(returns) << endl;
-};
-@endcode
-
-This will train three different agents on three CPU threads asynchronously and use this data to update the
-action value estimate.
-Voila, thats all there is to it.
-
-Here is the full code to try this right away:
-
-@code
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/ffn.hpp>
-#include <mlpack/methods/ann/init_rules/gaussian_init.hpp>
-#include <mlpack/methods/ann/layer/layer.hpp>
-#include <mlpack/methods/ann/loss_functions/mean_squared_error.hpp>
-#include <mlpack/methods/reinforcement_learning/async_learning.hpp>
-#include <mlpack/methods/reinforcement_learning/environment/cart_pole.hpp>
-#include <mlpack/methods/reinforcement_learning/policy/greedy_policy.hpp>
-#include <mlpack/methods/reinforcement_learning/policy/aggregated_policy.hpp>
-#include <mlpack/methods/reinforcement_learning/training_config.hpp>
-#include <ensmallen.hpp>
-
-using namespace mlpack;
-using namespace mlpack::ann;
-using namespace mlpack::rl;
-int main()
-{
-  // Set up the network.
-  FFN<MeanSquaredError<>, GaussianInitialization> model(MeanSquaredError<>(), GaussianInitialization(0, 0.001));
-  model.Add<Linear<>>(4, 128);
-  model.Add<ReLULayer<>>();
-  model.Add<Linear<>>(128, 128);
-  model.Add<ReLULayer<>>();
-  model.Add<Linear<>>(128, 2);
-
-  AggregatedPolicy<GreedyPolicy<CartPole>> policy({GreedyPolicy<CartPole>(0.7, 5000, 0.1),
-                                                   GreedyPolicy<CartPole>(0.7, 5000, 0.01),
-                                                   GreedyPolicy<CartPole>(0.7, 5000, 0.5)},
-                                                   arma::colvec("0.4 0.3 0.3"));
-
-  TrainingConfig config;
-  config.StepSize() = 0.01;
-  config.Discount() = 0.9;
-  config.TargetNetworkSyncInterval() = 100;
-  config.ExplorationSteps() = 100;
-  config.DoubleQLearning() = false;
-  config.StepLimit() = 200;
-
-  OneStepQLearning<CartPole, decltype(model), ens::VanillaUpdate, decltype(policy)>
-      agent(std::move(config), std::move(model), std::move(policy));
-
-  arma::vec returns(20, arma::fill::zeros);
-  size_t position = 0;
-  size_t episode = 0;
-
-  auto measure = [&returns, &position, &episode](double episodeReturn)
-  {
-    if(episode > 10000) return true;
-
-    returns[position++] = episodeReturn;
-    position = position % returns.n_elem;
-    episode++;
-
-    std::cout << "Episode No.: " << episode
-        << "; Episode Return: " << episodeReturn
-        << "; Average Return: " << arma::mean(returns) << endl;
-  };
-
-  for (int i = 0; i < 100; i++)
-  {
-    agent.Train(measure);
-  }
-}
-@endcode
-
-@section further_rltut Further documentation
-
-For further documentation on the rl classes, consult the \ref mlpack::rl
-"complete API documentation".
-
-*/
diff -pruN 3.4.2-7/doc/tutorials/reinforcement_learning.md 4.0.1-1/doc/tutorials/reinforcement_learning.md
--- 3.4.2-7/doc/tutorials/reinforcement_learning.md	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/reinforcement_learning.md	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,387 @@
+# Reinforcement Learning Tutorial
+
+Reinforcement Learning is one of the hottest topics right now, with interest
+surging after DeepMind published their article on training deep neural networks
+to play Atari games to great success. mlpack implements a complete end-to-end
+framework for Reinforcement Learning, featuring multiple environments, policies
+and methods. Of course, custom environments and policies can be used and plugged
+into the existing framework with no runtime overhead.
+
+mlpack implements typical benchmark environments (Acrobot, Mountain car etc.),
+commonly used policies, replay methods and supports asynchronous learning as
+well. In addition, it can [communicate](https://github.com/zoq/gym_tcp_api) with
+the OpenAI Gym toolkit for more environments.
+
+## Reinforcement Learning Environments
+
+mlpack implements a number of the most popular environments used for testing RL
+agents and algorithms. These include the Cart Pole, Acrobot, Mountain Car and
+their variations. Of course, as mentioned above, you can communicate with OpenAI
+Gym for other environments, like the Atari video games.
+
+A key component of mlpack is its extensibility. It is a simple process to create
+your own custom environments, specific to your needs, and use it with mlpack's
+RL framework. All the environments implement a few specific methods and classes
+which are used by the agents while learning.
+
+- `State`: The `State` class is a representation of the environment. For the
+  `CartPole`, this would involve storing the position, velocity, angle and
+   angular velocity.
+
+- `Action`: For discrete environments, `Action` is a class with an enum naming
+  all the possible actions the agent can take in the environment. Continuing
+  with the `CartPole` example, the enum would simply contain the two possible
+  actions, `backward` and `forward`. For continuous environments, the `Action`
+  class contains an array with its size depending on the action space.
+
+- `Sample`: This method is perhaps the heart of the environment, providing
+  rewards to the agent depending on the state and the action taken, and updates
+  the state based on the action taken as well.
+
+Of course, your custom environment will most likely make use of a number of
+helper methods, depending on your application, such as the `Dsdt` method in the
+`Acrobot` environment, used in the `RK4` iterative method (also another helper
+method) to estimate the next state.
+
+## Components of an RL Agent
+
+A Reinforcement Learning agent, in general, takes actions in an environment in
+order to maximize a cumulative reward. To that end, it requires a way to choose
+actions (*policy*) and a way to sample previous experiences (*replay*).
+
+An example of a simple policy would be an epsilon-greedy policy. Using such a
+policy, the agent will choose actions greedily with some probability epsilon.
+This probability is slowly decreased over time, balancing the line between
+exploration and exploitation.
+
+Similarly, an example of a simple replay would be a random replay. At each time
+step, the interactions between the agent and the environment are saved to a
+memory buffer and previous experiences are sampled from the buffer to train the
+agent.
+
+Instantiating the components of an agent can be easily done by passing the
+Environment as a templated argument and the parameters of the policy/replay to
+the constructor.
+
+To create a Greedy Policy and Prioritized Replay for the `CartPole` environment,
+we would do the following:
+
+```c++
+GreedyPolicy<CartPole> policy(1.0, 1000, 0.1);
+PrioritizedReplay<CartPole> replayMethod(10, 10000, 0.6);
+```
+
+The arguments to `policy` are the initial epsilon values, the interval of
+decrease in its value and the value at which epsilon bottoms out and won't be
+reduced further. The arguments to `replayMethod` are size of the batch returned,
+the number of examples stored in memory, and the degree of prioritization.
+
+In addition to the above components, an RL agent requires many hyperparameters
+to be tuned during it's training period. These parameters include everything
+from the discount rate of the future reward to whether Double Q-learning should
+be used or not. The `TrainingConfig` class can be instantiated and configured as
+follows:
+
+```c++
+TrainingConfig config;
+config.StepSize() = 0.01;
+config.Discount() = 0.9;
+config.TargetNetworkSyncInterval() = 100;
+config.ExplorationSteps() = 100;
+config.DoubleQLearning() = false;
+config.StepLimit() = 200;
+```
+
+The object `config` describes an RL agent, using a step size of 0.01 for the
+optimization process, a discount factor of 0.9, sync interval of 200 episodes.
+This agent only starts learning after storing 100 exploration steps, has a step
+limit of 200, and does not utilize double q-learning.
+
+In this way, we can easily configure an RL agent with the desired
+hyperparameters.
+
+## Q-Learning in mlpack
+
+Here, we demonstrate Q-Learning in mlpack through the use of a simple example,
+the training of a Q-Learning agent on the `CartPole` environment. The code has
+been broken into chunks for easy understanding.
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+using namespace ens;
+using namespace mlpack::rl;
+```
+
+We include all the necessary components of our toy example and declare
+namespaces for convenience.
+
+```c++
+int main()
+{
+  // Set up the network.
+  SimpleDQN<> model(4, 64, 32, 2);
+```
+
+The first step in setting our Q-learning agent is to setup the network for it to
+use. `SimpleDQN` class creates a simple feed forward network with 2 hidden
+layers.  The network constructed here has an input shape of 4 and output shape
+of 2. This corresponds to the structure of the `CartPole` environment, where
+each state is represented as a column vector with 4 data members (position,
+velocity, angle, angular velocity). Similarly, the output shape is represented
+by the number of possible actions, which in this case, is only 2 (`foward` and
+`backward`).
+
+We can also use mlpack's ann module to set up a custom `FFN` network. For
+example, here we use a single hidden layer. However, the Q-Learning agent
+expects the object to have a `ResetNoise` method which `SimpleDQN` has.  We
+can't pass mlpack's `FFN` network directly. Instead, we have to wrap it into
+`SimpleDQN` object.
+
+```c++
+int main()
+{
+  // Set up the network.
+  FFN<MeanSquaredError<>, GaussianInitialization> network(MeanSquaredError<>(),
+      GaussianInitialization(0, 0.001));
+  network.Add<Linear<>>(4, 128);
+  network.Add<ReLULayer<>>();
+  network.Add<Linear<>>(128, 128);
+  network.Add<ReLULayer<>>();
+  network.Add<Linear<>>(128, 2);
+
+  SimpleDQN<> model(network);
+
+```
+
+The next step would be to setup the other components of the Q-learning agent,
+namely its policy, replay method and hyperparameters.
+
+```c++
+ // Set up the policy and replay method.
+  GreedyPolicy<CartPole> policy(1.0, 1000, 0.1, 0.99);
+  RandomReplay<CartPole> replayMethod(10, 10000);
+
+  TrainingConfig config;
+  config.StepSize() = 0.01;
+  config.Discount() = 0.9;
+  config.TargetNetworkSyncInterval() = 100;
+  config.ExplorationSteps() = 100;
+  config.DoubleQLearning() = false;
+  config.StepLimit() = 200;
+```
+
+And now, we get to the heart of the program, declaring a Q-Learning agent.
+
+```c++
+  QLearning<CartPole, decltype(model), AdamUpdate, decltype(policy)>
+      agent(config, model, policy, replayMethod);
+```
+
+Here, we call the `QLearning` constructor, passing in the type of environment,
+network, updater, policy and replay. We use `decltype(var)` as a shorthand for
+the variable, saving us the trouble of copying the lengthy templated type.
+
+We pass references of the objects we created, as parameters to `QLearning`
+class.
+
+Now, we have our Q-Learning agent `agent` ready to be trained on the Cart Pole
+environment.
+
+```c++
+  arma::running_stat<double> averageReturn;
+  size_t episodes = 0;
+  bool converged = true;
+  while (true)
+  {
+    double episodeReturn = agent.Episode();
+    averageReturn(episodeReturn);
+    episodes += 1;
+
+    if (episodes > 1000)
+    {
+      std::cout << "Cart Pole with DQN failed." << std::endl;
+      converged = false;
+      break;
+    }
+
+    /**
+     * Reaching running average return 35 is enough to show it works.
+     */
+    std::cout << "Average return: " << averageReturn.mean()
+        << " Episode return: " << episodeReturn << std::endl;
+    if (averageReturn.mean() > 35)
+      break;
+  }
+  if (converged)
+    std::cout << "Hooray! Q-Learning agent successfully trained" << std::endl;
+
+  return 0;
+}
+```
+
+We set up a loop to train the agent. The exit condition is determined by the
+average reward which can be computed with `arma::running_stat`. It is used for
+storing running statistics of scalars, which in this case is the reward signal.
+The agent can be said to have converged when the average return reaches a
+predetermined value (i.e. > 35).
+
+Conversely, if the average return does not go beyond that amount even after a
+thousand episodes, we can conclude that the agent will not converge and exit the
+training loop.
+
+## Asynchronous Learning
+
+In 2016, Researchers at Deepmind and University of Montreal published their
+paper "Asynchronous Methods for Deep Reinforcement Learning". In it they
+described asynchronous variants of four standard reinforcement learning
+algorithms:
+
+ - One-Step SARSA
+ - One-Step Q-Learning
+ - N-Step Q-Learning
+ - Advantage Actor-Critic(A3C)
+
+Online RL algorithms and Deep Neural Networks make an unstable combination
+because of the non-stationary and correlated nature of online updates. Although
+this is solved by Experience Replay, it has several drawbacks: it uses more
+memory and computation per real interaction; and it requires off-policy learning
+algorithms.
+
+Asynchronous methods, instead of experience replay, asynchronously executes
+multiple agents in parallel, on multiple instances of the environment, which
+solves all the above problems.
+
+Here, we demonstrate Asynchronous Learning methods in mlpack through the
+training of an async agent. Asynchronous learning involves training several
+agents simultaneously. Here, each of the agents are referred to as "workers".
+Currently mlpack has One-Step Q-Learning worker, N-Step Q-Learning worker and
+One-Step SARSA worker.
+
+Let's examine the sample code in chunks.
+
+Here we don't use experience replay, and instead of a single policy, we use
+three different policies, each corresponding to its worker. Number of workers
+created, depends on the number of policies given in the Aggregated Policy. The
+column vector contains the probability distribution for each child policy. We
+should make sure its size is same as the number of policies and the sum of its
+elements is equal to 1.
+
+```
+AggregatedPolicy<GreedyPolicy<CartPole>> policy({GreedyPolicy<CartPole>(0.7, 5000, 0.1),
+                                                 GreedyPolicy<CartPole>(0.7, 5000, 0.01),
+                                                 GreedyPolicy<CartPole>(0.7, 5000, 0.5)},
+                                                 arma::colvec("0.4 0.3 0.3"));
+```
+
+Now, we will create the `OneStepQLearning` agent. We could have used
+`NStepQLearning` or `OneStepSarsa` here according to our requirement.
+
+```c++
+OneStepQLearning<CartPole, decltype(model), ens::AdamUpdate, decltype(policy)>
+    agent(std::move(config), std::move(model), std::move(policy));
+```
+
+Here, unlike the Q-Learning example, instead of the entire while loop, we use
+the `Train()` method of the Asynchronous Learning class inside a for loop. 100
+training episodes will take around 50 seconds.
+
+```c++
+for (int i = 0; i < 100; i++)
+{
+  agent.Train(measure);
+}
+```
+
+What is "measure" here? It is a lambda function which returns a boolean value
+(indicating the end of training) and accepts the episode return (total reward of
+a deterministic test episode) as parameter.  So, let's create that.
+
+```c++
+arma::vec returns(20, arma::fill::zeros);
+size_t position = 0;
+size_t episode = 0;
+
+auto measure = [&returns, &position, &episode](double episodeReturn)
+{
+  if(episode > 10000) return true;
+
+  returns[position++] = episodeReturn;
+  position = position % returns.n_elem;
+  episode++;
+
+  std::cout << "Episode No.: " << episode
+      << "; Episode Return: " << episodeReturn
+      << "; Average Return: " << arma::mean(returns) << std::endl;
+};
+```
+
+This will train three different agents on three CPU threads asynchronously and
+use this data to update the action value estimate.
+
+Voila, that's all there is to it.
+
+Here is the full code to try this right away:
+
+```c++
+#include <mlpack.hpp>
+
+using namespace mlpack;
+using namespace mlpack::rl;
+
+int main()
+{
+  // Set up the network.
+  FFN<MeanSquaredError<>, GaussianInitialization> model(MeanSquaredError<>(), GaussianInitialization(0, 0.001));
+  model.Add<Linear<>>(4, 128);
+  model.Add<ReLULayer<>>();
+  model.Add<Linear<>>(128, 128);
+  model.Add<ReLULayer<>>();
+  model.Add<Linear<>>(128, 2);
+
+  AggregatedPolicy<GreedyPolicy<CartPole>> policy({GreedyPolicy<CartPole>(0.7, 5000, 0.1),
+                                                   GreedyPolicy<CartPole>(0.7, 5000, 0.01),
+                                                   GreedyPolicy<CartPole>(0.7, 5000, 0.5)},
+                                                   arma::colvec("0.4 0.3 0.3"));
+
+  TrainingConfig config;
+  config.StepSize() = 0.01;
+  config.Discount() = 0.9;
+  config.TargetNetworkSyncInterval() = 100;
+  config.ExplorationSteps() = 100;
+  config.DoubleQLearning() = false;
+  config.StepLimit() = 200;
+
+  OneStepQLearning<CartPole, decltype(model), ens::VanillaUpdate, decltype(policy)>
+      agent(std::move(config), std::move(model), std::move(policy));
+
+  arma::vec returns(20, arma::fill::zeros);
+  size_t position = 0;
+  size_t episode = 0;
+
+  auto measure = [&returns, &position, &episode](double episodeReturn)
+  {
+    if(episode > 10000) return true;
+
+    returns[position++] = episodeReturn;
+    position = position % returns.n_elem;
+    episode++;
+
+    std::cout << "Episode No.: " << episode
+        << "; Episode Return: " << episodeReturn
+        << "; Average Return: " << arma::mean(returns) << std::endl;
+  };
+
+  for (int i = 0; i < 100; i++)
+  {
+    agent.Train(measure);
+  }
+}
+```
+
+## Further Documentation
+
+For further documentation on the reinforcement learning classes, consult the
+documentation in the source code, found in
+`mlpack/methods/reinforcement_learning/`.
diff -pruN 3.4.2-7/doc/tutorials/res/ann.gv 4.0.1-1/doc/tutorials/res/ann.gv
--- 3.4.2-7/doc/tutorials/res/ann.gv	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/res/ann.gv	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,61 @@
+digraph G {
+  fontname = "Hilda 10"
+  rankdir=LR
+  splines=line
+  nodesep=.08;
+  ranksep=1;
+  edge [color=black, arrowsize=.5];
+  node [fixedsize=true,label="",style=filled,color=none,fillcolor=gray,shape=circle]
+
+  subgraph cluster_0 {
+    color=none;
+    node [style=filled, color=white, penwidth=15,fillcolor=black shape=circle];
+    l10  l11  l12  l13  l14  l15  ;
+    label = Input;
+  }
+
+  subgraph cluster_1 {
+    color=none;
+    node [style=filled, color=white, penwidth=15,fillcolor=gray shape=circle];
+    l20  l21  l22  l23  l24  l25  l26  l27  ;
+    label = Linear;
+  }
+
+  subgraph cluster_2 {
+    color=none;
+    node [style=filled, color=white, penwidth=15,fillcolor=gray shape=circle];
+    l30  l31  l32  l33  l34  l35  l36  l37  ;
+    label = Linear;
+  }
+
+  subgraph cluster_3 {
+    color=none;
+    node [style=filled, color=white, penwidth=15,fillcolor=black shape=circle];
+    l40  l41  l42  ;
+    label = LogSoftMax;
+  }
+
+  l10 -> l20   l10 -> l21   l10 -> l22   l10 -> l23   l10 -> l24   l10 -> l25
+  l10 -> l26   l10 -> l27   l11 -> l20   l11 -> l21   l11 -> l22   l11 -> l23
+  l11 -> l24   l11 -> l25   l11 -> l26   l11 -> l27   l12 -> l20   l12 -> l21
+  l12 -> l22   l12 -> l23   l12 -> l24   l12 -> l25   l12 -> l26   l12 -> l27
+  l13 -> l20   l13 -> l21   l13 -> l22   l13 -> l23   l13 -> l24   l13 -> l25
+  l13 -> l26   l13 -> l27   l14 -> l20   l14 -> l21   l14 -> l22   l14 -> l23
+  l14 -> l24   l14 -> l25   l14 -> l26   l14 -> l27   l15 -> l20   l15 -> l21
+  l15 -> l22   l15 -> l23   l15 -> l24   l15 -> l25   l15 -> l26   l15 -> l27
+  l20 -> l30   l20 -> l31   l20 -> l32   l20 -> l33   l20 -> l34   l20 -> l35
+  l20 -> l36   l20 -> l37   l21 -> l30   l21 -> l31   l21 -> l32   l21 -> l33
+  l21 -> l34   l21 -> l35   l21 -> l36   l21 -> l37   l22 -> l30   l22 -> l31
+  l22 -> l32   l22 -> l33   l22 -> l34   l22 -> l35   l22 -> l36   l22 -> l37
+  l23 -> l30   l23 -> l31   l23 -> l32   l23 -> l33   l23 -> l34   l23 -> l35
+  l23 -> l36   l23 -> l37   l24 -> l30   l24 -> l31   l24 -> l32   l24 -> l33
+  l24 -> l34   l24 -> l35   l24 -> l36   l24 -> l37   l25 -> l30   l25 -> l31
+  l25 -> l32   l25 -> l33   l25 -> l34   l25 -> l35   l25 -> l36   l25 -> l37
+  l26 -> l30   l26 -> l31   l26 -> l32   l26 -> l33   l26 -> l34   l26 -> l35
+  l26 -> l36   l26 -> l37   l27 -> l30   l27 -> l31   l27 -> l32   l27 -> l33
+  l27 -> l34   l27 -> l35   l27 -> l36   l27 -> l37   l30 -> l40   l30 -> l41
+  l30 -> l42   l31 -> l40   l31 -> l41   l31 -> l42   l32 -> l40   l32 -> l41
+  l32 -> l42   l33 -> l40   l33 -> l41   l33 -> l42   l34 -> l40   l34 -> l41
+  l34 -> l42   l35 -> l40   l35 -> l41   l35 -> l42   l36 -> l40   l36 -> l41
+  l36 -> l42   l37 -> l40   l37 -> l41   l37 -> l42
+}
diff -pruN 3.4.2-7/doc/tutorials/res/ann.svg 4.0.1-1/doc/tutorials/res/ann.svg
--- 3.4.2-7/doc/tutorials/res/ann.svg	1970-01-01 00:00:00.000000000 +0000
+++ 4.0.1-1/doc/tutorials/res/ann.svg	2022-12-29 15:40:18.000000000 +0000
@@ -0,0 +1,974 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.43.0 (0)
+ -->
+<!-- Title: G Pages: 1 -->
+<svg width="452pt" height="393pt"
+ viewBox="0.00 0.00 452.00 393.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3