## Lecture 5, sparse matrix operations.
## Load the Candyland matrix.
using DelimitedFiles
using SparseArrays
data = readdlm("candyland-matrix.csv",',')
TI = Int.(data[:,1])
TJ = Int.(data[:,2])
TV = data[:,3]
T = sparse(TI,TJ, TV, 140,140)
coords = readdlm("candyland-coords.csv",',')
x = coords[:,1]
y = coords[:,2]
cells = Int.(readdlm("candyland-cells.csv",','))
start,dest = cells[1:2]
## Let's see the matrix we had in class
C = sparse([0 1 0 0 1; 2 0 0 6 0; 0 0 0 0 3])
@show C.colptr
@show C.rowval
@show C.nzval
##
sum(T,dims=1)
##
findall(vec(sum(T,dims=1)) .< 0.9)
## Let's see how Julia stores thigns
typeof(T)
## What can we do with T?
fieldnames(typeof(T))
(:m, :n, :colptr, :rowval, :nzval)
##
T.colptr
##
T.nzval
##
T.rowval
## Why sparse matrices?
# The first reason is memory!
print(varinfo(r"T"))
A = Array(T)
print(varinfo(r"A")) # so it takes about 20% of the memory!
## The second reason is speed as you'll see on the homework.
## The downside is that some operations need a lot more care.
## Let's write our own matvec given the raw arrays
""" My goal: compute T*x where we just use the
index arrays for coordinate/index/triplet storage
of the non-zero data in T.
TI, TJ, TV: the triplet arrays for T.
x: the vector for T*x (should have at least n entries
for the n columns of T)
m: the number of rows
"""
function mymatvec(TI,TJ,TV,x,m)
y = zeros(m) # allocate output
for nzi in 1:length(TI) # for each entry in T
i = TI[nzi]
j = TJ[nzi]
v = TV[nzi]
y[i] += v*x[j]
end
# A fancy way
#for (i,j,v) in zip(TI,TJ,TV)
# y[i] += v*x[j]
# end
return y
end
x = randn(140)
mymatvec(TI,TJ,TV,x,140) == T*x
##
y = mymatvec(TI,TJ,TV,x,140)
z = T*x
##
## Let's see how Julia stores thigns
typeof(T)
## What can we do with T?
fieldnames(typeof(T))
##
T.colptr
##
T.nzval
##
T.rowval
##
""" This does a matvec y=T*x using CSC arrays instead.
colptr,rowval,nzval: CSC arrays for a matrix T
x: the vector with at least n columns
m: the number of rows so we can allocate the output.
"""
function cscmatvec(colptr,rowval,nzval,x,m)
y = zeros(m) # allocate output
for j=1:length(colptr)-1 # for each column ...
for nzi=colptr[j]:colptr[j+1]-1 # for each entry in the column
i = rowval[nzi]
v = nzval[nzi]
y[i] += v*x[j] # same as indexed form!
end
end
return y
end
cscmatvec(T.colptr,T.rowval,T.nzval,x,140) == T*x
## Which one is faster? (If any) (Question from class)
using BenchmarkTools
@btime mymatvec(TI,TJ,TV,x,140)
@btime cscmatvec(T.colptr,T.rowval,T.nzval,x,140)
## Note that getting a random column in T is fast
# Note, run this a few times, the first time
# in Julia tends to be unreliable.
@btime T[:,(rand(1:size(T,2)))]
## But getting a random row in T is slow
@btime T[rand(1:size(T,1)),:]
## Okay, maybe we need a bigger matrix
S = sprandn(100000,100000,10/100000)
##Note that getting a random column in S is fast
@btime S[:,rand(1:size(T,2))]
## But getting a random row in S is slow
@btime S[rand(1:size(T,1)),:]
## Also note that there is NO way we could possibly store S with dense
length(S)*8/1e9 # it would take 80GB of memory, which is 10x my Mac.
## If you haven't compiled an operation before, it can take a while to run
@time S[:,UInt32(rand(1:size(S,2)))]
##
@time S[:,UInt32(rand(1:size(S,2)))]
## Back to uint64
@time S[:,UInt64(rand(1:size(S,2)))]
##
@btime S[:,Int32(rand(1:size(S,2)))]
##
@btime S[:,Int32(rand(1:size(S,2)))]
## And we can go bigger
S = sprandn(10000000,10000000,10/10000000)
##Note that getting a random column in S is fast
@time S[:,rand(1:size(T,2))]
## But getting a random row in S is slow
@time S[rand(1:size(T,1)),:]