RCRP for Multinomial likelihood¶
Scenario¶
| k = 1 | k = 2 | k = 3 | k = 4 | k = 5 | k = 6 | |
|---|---|---|---|---|---|---|
| Epoch 1 | O | O | O | |||
| Epoch 2 | O | O | O | O | ||
| Epoch 3 | O | O | O | O | ||
| Epoch 4 | O | O | O | O | ||
| Epoch 5 | O | O | O | O |
true_KK = 6
TT = 5
N_t = fill(500, TT)
n_tokens = 25
vocab_size = 9
true_topics = BIAS.gen_bars(true_KK, vocab_size, 0.0)
xx = Array(Vector{Sent}, TT)
true_zz = Array(Vector{Int}, TT)
true_nn = zeros(Int, TT, true_KK)
epoch_chains = Array(Vector{Int}, TT)
epoch_chains[1] = [1, 2, 3]
epoch_chains[2] = [1, 2, 3, 4]
epoch_chains[3] = [2, 3, 4, 5]
epoch_chains[4] = [3, 4, 5, 6]
epoch_chains[5] = [3, 4, 5, 6]
for tt = 1:TT
n_chains = length(epoch_chains[tt])
mix = ones(Float64, n_chains)/n_chains
xx_ = Array(Sent, N_t[tt])
true_zz_ = ones(Int64, N_t[tt])
for n = 1:N_t[tt]
kk = sample(mix)
kk = epoch_chains[tt][kk]
true_zz_[n] = kk
sentence = sample(true_topics[kk, :][:], n_tokens)
xx_[n] = BIAS.sparsify_sentence(sentence)
true_nn[tt, kk] += 1
end
xx[tt] = xx_
true_zz[tt] = true_zz_
end
dd = vocab_size
aa = 1.0
q0 = MultinomialDirichlet(dd, aa)
init_KK = 3
rcrp_aa = 1
rcrp_a1 = 1
rcrp_a2 = 1
rcrp = RCRP(q0, init_KK, rcrp_aa, TT, rcrp_a1, rcrp_a2)
# sampling
zz = Array(Vector{Int64}, TT)
for tt = 1:TT
zz[tt] = rand(1:rcrp.K, N_t[tt])
end
n_burnins = 100
n_lags = 3
n_samples = 400
sample_hyperparam = true
n_internals = 10
store_every = 10000
filename = "demo_RCRP_MultinomialDirichlet"
nn, components, zz2table = collapsed_gibbs_sampler!(rcrp, xx, zz,
n_burnins, n_lags, n_samples, sample_hyperparam, n_internals, store_every, filename)
Result¶
julia> z2table 5x8 Array{Int64,2}:
0 1 2 3 0 0 0 0 0 1 2 3 4 0 0 0 0 1 2 0 3 4 0 0 0 0 1 0 2 3 4 0 1 0 4 0 2 6 3 5)
julia> nn 5x8 Array{Int64,2}:
0 162 170 168 0 0 0 0 0 136 123 120 121 0 0 0 0 122 127 0 108 143 0 0 0 0 133 0 125 131 111 0126 0 115 0 118 6 133 2
julia> true_nn 5x6 Array{Int64,2}:
168 162 170 0 0 0 120 136 123 121 0 0
0 122 127 108 143 0 0 0 133 125 131 111 0 0 115 118 132 135