using Plots, LinearAlgebra, Random, SparseArrays
using Optim
# example of Rosenbrock function
# NOTE, This doesn't work, see the next cell!
# See these things are hard to use :)
function happyf(x)
return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end
function sadg!(x::Vector, storage::Vector)
storage[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
storage[2] = 200.0 * (x[2] - x[1]^2)
end
soln = optimize(happyf, sadg!, [0.0, 0.0], GradientDescent())
* Status: failure * Candidate solution Final objective value: NaN * Found with Algorithm: Gradient Descent * Convergence measures |x - x'| = 0.00e+00 ≤ 0.0e+00 |x - x'|/|x'| = NaN ≰ 0.0e+00 |f(x) - f(x')| = NaN ≰ 0.0e+00 |f(x) - f(x')|/|f(x')| = NaN ≰ 0.0e+00 |g(x)| = NaN ≰ 1.0e-08 * Work counters Seconds run: 0 (vs limit Inf) Iterations: 0 f(x) calls: 1 ∇f(x) calls: 1
# example of Rosenbrock function
function f(x)
return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end
function g!(storage::Vector, x::Vector)
storage[1] = -2.0 .* (1.0 - x[1]) - 400.0 .* (x[2] - x[1]^2) * x[1]
storage[2] = 200.0 .* (x[2] - x[1]^2)
end
soln = optimize(f, g!, [0.0, 0.0], BFGS())
* Status: success * Candidate solution Final objective value: 7.645688e-21 * Found with Algorithm: BFGS * Convergence measures |x - x'| = 3.48e-07 ≰ 0.0e+00 |x - x'|/|x'| = 3.48e-07 ≰ 0.0e+00 |f(x) - f(x')| = 6.91e-14 ≰ 0.0e+00 |f(x) - f(x')|/|f(x')| = 9.03e+06 ≰ 0.0e+00 |g(x)| = 2.32e-09 ≤ 1.0e-08 * Work counters Seconds run: 0 (vs limit Inf) Iterations: 16 f(x) calls: 53 ∇f(x) calls: 53
soln.minimizer
2-element Vector{Float64}: 0.9999999999373603 0.9999999998686199
soln = optimize(f, g!, [0.0, 0.0], GradientDescent())
* Status: failure (reached maximum number of iterations) * Candidate solution Final objective value: 4.154719e-03 * Found with Algorithm: Gradient Descent * Convergence measures |x - x'| = 1.82e-04 ≰ 0.0e+00 |x - x'|/|x'| = 1.95e-04 ≰ 0.0e+00 |f(x) - f(x')| = 8.18e-06 ≰ 0.0e+00 |f(x) - f(x')|/|f(x')| = 1.97e-03 ≰ 0.0e+00 |g(x)| = 8.21e-02 ≰ 1.0e-08 * Work counters Seconds run: 0 (vs limit Inf) Iterations: 1000 f(x) calls: 2532 ∇f(x) calls: 2532
# ChatGPT gave us this line to increase the number of iterations...
soln = optimize(f, g!, [0.0, 0.0], GradientDescent(), Optim.Options(iterations=30000))
* Status: success * Candidate solution Final objective value: 7.369388e-17 * Found with Algorithm: Gradient Descent * Convergence measures |x - x'| = 2.00e-11 ≰ 0.0e+00 |x - x'|/|x'| = 2.00e-11 ≰ 0.0e+00 |f(x) - f(x')| = 1.19e-19 ≰ 0.0e+00 |f(x) - f(x')|/|f(x')| = 1.61e-03 ≰ 0.0e+00 |g(x)| = 9.99e-09 ≤ 1.0e-08 * Work counters Seconds run: 0 (vs limit Inf) Iterations: 20458 f(x) calls: 51177 ∇f(x) calls: 51177
# now, we do the matrix factorization example
# originally from Poblano example2
function matrix_approx_function(x::Vector, A::Matrix, r::Int)
# unpack U and V from x
m,n = size(A)
U = reshape(x[1:m*r],m,r)
V = reshape(x[(m*r+1):end],n,r)
return 0.5*norm(A - U*V')^2
end
function matrix_approx_gradient!(storage::Vector, x::Vector, A::Matrix, r::Int)
m,n = size(A)
U = reshape(x[1:m*r],m,r)
V = reshape(x[(m*r+1):end],n,r)
D = A - U*V'
storage[1:(m*r)] = -vec(D*V)
storage[(m*r+1):end] = -vec(D'*U)
end
matrix_approx_gradient! (generic function with 1 method)
# Ideally, we'd check our gradient here!!!
# David foolishly asserts it's correct. We shouldn't trust him.
using Random
Random.seed!(42)
m = 10
n = 8
A = randn(m,n)
#A = Matrix(1.0I,m,n)
r = 8
myf = x -> matrix_approx_function(x, A, r)
myg! = (storage, x) -> matrix_approx_gradient!(storage, x, A, r)
soln = optimize(myf, myg!, ones(m*r+n*r), BFGS(), Optim.Options(f_tol = 1e-10))
#soln = optimize(myf, myg!, randn(m*r+n*r), BFGS(), Optim.Options(f_tol = 1e-8))
x = Optim.minimizer(soln)
@show soln
Uopt = reshape(x[(1:m*r)],m,r)
Vopt = reshape(x[(m*r+1):end],n,r)
objval = 2*myf(x)
opterr = norm(A-Uopt*Vopt')^2
Usvd,Ssvd,Vsvd = svd(A)
svderr = norm(A-Usvd[:,1:r]*Diagonal(Ssvd[1:r])*Vsvd[:,1:r]')^2
@show objval
@show opterr
@show svderr
; # hide final output in JuliaBox
soln = * Status: success * Candidate solution Final objective value: 1.182346e-17 * Found with Algorithm: BFGS * Convergence measures |x - x'| = 3.92e-09 ≰ 0.0e+00 |x - x'|/|x'| = 7.09e-10 ≰ 0.0e+00 |f(x) - f(x')| = 1.05e-16 ≰ 0.0e+00 |f(x) - f(x')|/|f(x')| = 8.90e+00 ≰ 1.0e-10 |g(x)| = 9.16e-09 ≤ 1.0e-08 * Work counters Seconds run: 0 (vs limit Inf) Iterations: 130 f(x) calls: 317 ∇f(x) calls: 317 objval = 2.364691437144303e-17 opterr = 2.364691437144303e-17 svderr = 1.2555705456793983e-28
Uopt
10×8 Matrix{Float64}: -3.33735 -2.19524 -2.53906 … -2.54182 -2.80942 -2.89024 -3.7234 -4.18208 -3.68367 -4.93599 -4.45906 -3.56531 -0.983073 0.348816 0.849748 0.163245 0.238055 0.727 4.038 4.99166 4.58689 4.27451 5.5284 5.27877 0.564552 -0.613908 0.332522 0.142497 0.33654 0.710277 3.02217 1.44327 1.31406 … 1.95573 1.31999 2.43178 0.124511 0.702701 1.5509 1.36139 0.167772 0.747722 3.01424 5.12274 3.71072 4.67978 4.09678 3.2948 4.45295 4.73057 3.90561 2.9314 3.33936 3.18532 2.91507 2.16179 2.13676 2.62318 2.44848 2.30038
Vopt
8×8 Matrix{Float64}: 0.0924827 -0.854372 -0.565314 … 0.292707 0.617051 0.411362 0.387538 -0.40823 0.206447 0.126999 -0.539906 0.815118 -0.0532201 -0.720962 0.800373 -0.129858 -0.154867 0.60556 -0.785825 0.095415 0.646872 -0.219982 -0.0859667 0.880899 -0.331012 0.113304 0.142373 0.27635 0.258298 0.47644 0.540078 0.236838 -0.365907 … -1.13269 0.55546 0.218527 0.0694765 -0.404263 0.447314 -0.558633 -0.38241 -0.427689 0.415032 0.234113 0.189276 -0.0432092 0.366161 0.52138
Usvd
10×8 Matrix{Float64}: 0.259466 -0.381811 -0.275723 … 0.144914 -0.151181 0.724038 0.435111 0.233448 0.0830214 -0.420493 -0.475092 -0.00147801 -0.0288846 0.608252 -0.539545 -0.0986429 0.292082 0.0335627 -0.513671 0.333858 -0.0538174 0.258408 -0.675488 0.133151 -0.0566207 -0.0354408 0.205559 0.279143 0.186201 0.251128 -0.177233 0.261441 0.413036 … -0.503843 0.120149 0.562817 -0.155586 -0.379952 -0.381767 -0.596319 -0.115871 -0.11921 -0.41661 -0.154051 -0.401148 -0.033717 0.0310957 0.148548 -0.417507 -0.0833247 0.192219 -0.173909 0.305561 0.0334984 -0.26826 -0.271378 0.258094 -0.0832108 -0.230149 -0.199207