## Lecture 4, sparse matrix operations.
#= We are going to look at the Candyland matrix!
See some analysis online:
* <http://www.lscheffer.com/CandyLand.htm>
* <http://datagenetics.com/blog/december12011/index.html>
We are studying a slightly different set of rules
with respect to the sticky states. In one set
of rules (on those pages), a sticky state just holds
your character determinstically for one step. In our
set of rules, a sticky state holds you until you draw
a specific color. So if you see some differences,
that is why.
=#
## We have build the Markov transition matrix for you!
using DelimitedFiles
using SparseArrays
data = readdlm("candyland-matrix.csv",',')
TI = Int.(data[:,1])
TJ = Int.(data[:,2])
TV = data[:,3]
T = sparse(TI,TJ, TV, 140,140)
coords = readdlm("candyland-coords.csv",',')
x = coords[:,1]
y = coords[:,2]
cells = Int.(readdlm("candyland-cells.csv",','))
start,dest = cells[1:2]
2-element Vector{Int64}:
140
134
##
sum(T,dims=1)
1×140 Matrix{Float64}:
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 0.0 1.0 1.0 1.0 1.0 1.0 1.0
##
findall(vec(sum(T,dims=1)) .< 0.9)
1-element Vector{Int64}:
134
## Let's see how Julia stores thigns
typeof(T)
SparseMatrixCSC{Float64, Int64}
## What can we do with T?
fieldnames(typeof(T))
(:m, :n, :colptr, :rowval, :nzval)
(:m, :n, :colptr, :rowval, :nzval)
##
T.colptr
141-element Vector{Int64}:
1
19
37
55
73
74
92
110
128
146
164
182
200
⋮
2210
2220
2229
2237
2244
2244
2245
2246
2249
2252
2255
2273
##
T.nzval
2272-element Vector{Float64}:
0.09375
0.09375
0.09375
0.09375
0.09375
0.09375
0.0625
0.015625
0.0625
0.0625
0.046875
0.046875
0.0625
⋮
0.0625
0.0625
0.015625
0.0625
0.0625
0.046875
0.046875
0.015625
0.015625
0.015625
0.015625
0.015625
##
T.rowval
2272-element Vector{Int64}:
2
3
4
5
6
7
8
9
10
11
12
13
14
⋮
7
8
9
10
11
12
13
20
42
69
92
102
## Why sparse matrices?
# The first reason is memory!
print(varinfo(r"T"))
A = Array(T)
print(varinfo(r"A")) # so it takes about 10% of the memory!
| name | size | summary |
|:---- | ----------:|:---------------------------------------------------------------- |
| T | 36.758 KiB | 140×140 SparseMatrixCSC{Float64, Int64} with 2272 stored entries |
| TI | 17.789 KiB | 2272-element Vector{Int64} |
| TJ | 17.789 KiB | 2272-element Vector{Int64} |
| TV | 17.789 KiB | 2272-element Vector{Float64} |
| name | size | summary |
|:---- | -----------:|:----------------------- |
| A | 153.164 KiB | 140×140 Matrix{Float64} |
## The second reason is speed as you'll see on the homework.
## The downside is that some operations need a lot more care.
## Let's write our own matvec given the raw arrays
function mymatvec(TI,TJ,TV,x,m)
y = zeros(m)
for nzi in 1:length(TI)
i = TI[nzi]
j = TJ[nzi]
v = TV[nzi]
y[i] += v*x[j]
end
# A fancy way
#for (i,j,v) in zip(TI,TJ,TV)
# y[i] += v*x[j]
# end
return y
end
x = randn(140)
mymatvec(TI,TJ,TV,x,140) == T*x
true
##
y = mymatvec(TI,TJ,TV,x,140)
z = T*x
140-element Vector{Float64}:
-0.05009905647012144
-0.22061218649942774
-0.09376967937123634
-0.30269272998328123
-0.4142411662777157
-0.4142411662777157
-0.35015022066581003
-0.28714951826734675
-0.017238332370831402
-0.42714734603389765
-0.42639454696999557
-0.34917341288596915
-0.27575108695768524
⋮
-0.14914329410221044
-0.23488323223120677
-0.19659438969249066
-0.21572021910896808
-0.14924061007912873
-0.29842399595249375
0.24772616108499199
0.46937962973020614
-0.7194342688800975
4.617057342353504
-0.25827164988195656
0.0
##
## Let's see how Julia stores thigns
typeof(T)
SparseMatrixCSC{Float64, Int64}
## What can we do with T?
fieldnames(typeof(T))
(:m, :n, :colptr, :rowval, :nzval)
##
T.colptr
141-element Vector{Int64}:
1
19
37
55
73
74
92
110
128
146
164
182
200
⋮
2210
2220
2229
2237
2244
2244
2245
2246
2249
2252
2255
2273
##
T.nzval
2272-element Vector{Float64}:
0.09375
0.09375
0.09375
0.09375
0.09375
0.09375
0.0625
0.015625
0.0625
0.0625
0.046875
0.046875
0.0625
⋮
0.0625
0.0625
0.015625
0.0625
0.0625
0.046875
0.046875
0.015625
0.015625
0.015625
0.015625
0.015625
##
T.rowval
2272-element Vector{Int64}:
2
3
4
5
6
7
8
9
10
11
12
13
14
⋮
7
8
9
10
11
12
13
20
42
69
92
102
##
function cscmatvec(colptr,rowval,nzval,x,m)
y = zeros(m)
for j=1:length(colptr)-1 # for each column ...
for nzi=colptr[j]:colptr[j+1]-1 # for each entry in the column
i = rowval[nzi]
v = nzval[nzi]
y[i] += v*x[j]
end
end
return y
end
cscmatvec(T.colptr,T.rowval,T.nzval,x,140) == T*x
true
## Note that getting a random column in T is fast
# Note, run this a few times, the first time
# in Julia tends to be unreliable.
@time T[:,(rand(1:size(T,2)))]
0.033037 seconds (26.12 k allocations: 1.750 MiB, 99.78% compilation time)
140-element SparseVector{Float64, Int64} with 17 stored entries:
[9 ] = 0.015625
[20 ] = 0.015625
[42 ] = 0.015625
[69 ] = 0.015625
[92 ] = 0.015625
[102] = 0.015625
[124] = 0.09375
[125] = 0.09375
[126] = 0.09375
[127] = 0.09375
[128] = 0.09375
[129] = 0.09375
[130] = 0.0625
[131] = 0.046875
[132] = 0.046875
[133] = 0.0625
[134] = 0.125
## But getting a random row in T is slow
@time T[rand(1:size(T,1)),:]
0.024496 seconds (18.01 k allocations: 1.218 MiB, 99.79% compilation time)
140-element SparseVector{Float64, Int64} with 11 stored entries:
[43] = 0.0625
[44] = 0.0625
[45] = 0.0625
[46] = 0.0625
[48] = 0.0625
[49] = 0.09375
[50] = 0.09375
[51] = 0.09375
[52] = 0.09375
[53] = 0.09375
[54] = 0.09375
## Okay, maybe we need a bigger matrix
S = sprandn(100000,100000,10/100000)
100000×100000 SparseMatrixCSC{Float64, Int64} with 1000524 stored entries:
⎡⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎤
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎣⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎦
##Note that getting a random column in S is fast
@time S[:,rand(1:size(T,2))]
0.000012 seconds (4 allocations: 288 bytes)
100000-element SparseVector{Float64, Int64} with 6 stored entries:
[5492 ] = -0.73382
[21669] = -0.812271
[46908] = -0.210555
[52075] = 0.0483097
[72567] = -0.0525567
[86414] = -0.102364
## But getting a random row in S is slow
@time S[rand(1:size(T,1)),:]
0.000616 seconds (8 allocations: 1024 bytes)
100000-element SparseVector{Float64, Int64} with 11 stored entries:
[299 ] = 0.345405
[8679 ] = -0.123843
[18970] = -0.228814
[24782] = 1.02155
[30110] = -0.637553
[44628] = 0.212392
[56864] = 0.706521
[82412] = 0.93487
[89347] = -1.74611
[90550] = -0.353132
[94031] = -0.818578
## Also note that there is NO way we could possibly store S with dense
length(S)*8/1e9 # it would take 80GB of memory, which is 10x my Mac.
80.0
## And we can go bigger
S = sprandn(10000000,10000000,10/10000000)
10000000×10000000 SparseMatrixCSC{Float64, Int64} with 100013479 stored entries:
⎡⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎤
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎢⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎥
⎣⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎦
##Note that getting a random column in S is fast
@time S[:,rand(1:size(T,2))]
0.000014 seconds (4 allocations: 384 bytes)
10000000-element SparseVector{Float64, Int64} with 12 stored entries:
[32841 ] = 1.07611
[199734 ] = -0.433621
[1822585] = 0.95794
[2139478] = -0.604192
[2547398] = -0.0881245
[3136716] = 0.788702
[3236962] = -1.83681
[3300971] = -0.110756
[4834678] = -1.52597
[7589493] = 0.313444
[7794805] = -1.61942
[9230011] = -0.144024
## But getting a random row in S is slow
@time S[rand(1:size(T,1)),:]
0.058894 seconds (6 allocations: 352 bytes)
10000000-element SparseVector{Float64, Int64} with 7 stored entries:
[380418 ] = 0.41565
[419894 ] = -1.74231
[3724358] = -1.61303
[4332260] = -1.60416
[5362837] = -0.891753
[8527827] = 0.988961
[9115564] = 0.303262