library(purrr)
library(tidyverse)
library(ggplot2)
library(knitr)
Load packages
Imaging we want to draw balls from an Urn. Balls are with different colors. Each color has a ‘weight’. When we draw a ball, a given ball is chosen with probability equal to (Weight of that ball)/(total weight of all balls in the Urn).
Now there are two red balls and one black ball in the urn. If we draw a nonblack ball, we return that ball to urn along with another ball with same color. If we draw a black ball, we return that ball to urn along with another ball that has a color that has not appeared in the urn. Repeat ball drawing process until we have 50 nonblack balls. We repeat this process several times. Assuming all nonblack balls have weight one. Now we want to know the expected number of differnet non black colors in the urn at the end, and the distribution of the numbers of nonblack colors at the end.
First, I wrote an urn function which takes four arguements. First one is the number of colors in the urn at start, second one is the initial number of balls in the urn, the third one represents the number of nonblack balls in the end, and the last one is the weight of the black ball.
Urn problem wiki page link
# set the random number seed
set.seed(16)
# Now write a function to simulate the Urn model
<- function(NumberOfColorsInUrnAtStart, InitialNBalls, NonBlackBalls, weightofblackball) {
UrnSim # set up the initial state of the urn
<- rep(NA, NonBlackBalls + 1)
Urn <- NumberOfColorsInUrnAtStart
NumberOfColorsUsed
# we will start with three balls: two "red" and one "black"
# black = 0 and red = 1
1] <- 0
Urn[2] <- 1
Urn[3] <- 1
Urn[
# set up a counter (NumberOfBalls) to keep track of how many balls we have
<- sum(Urn >= 0, na.rm = TRUE)
NumberOfBalls
# set-up a loop that pulls a ball from the urn and takes the appropriate action
while (NumberOfBalls < (NonBlackBalls + 1)) {
# set the probability of draw each ball
<- c(
myprob / (weightofblackball + NumberOfBalls - 1),
weightofblackball rep(1 / (weightofblackball + NumberOfBalls - 1), NumberOfBalls - 1)
)
# draw a ball (WhichBall)
<- Urn[sample(1:NumberOfBalls, size = 1, prob = myprob)]
WhichBall
# if draw a black ball
if (WhichBall == 0) {
<- Urn[sample(2:NumberOfBalls, 1)]
WhichBall_nonblack # return the ball and change the one's color
# the number of color that we have used should be increased
# but it does not necessarily mean that the number of colors in our urn has increased
<- NumberOfColorsUsed + 1
NumberOfColorsUsed # put back that ball with changed color
<- NumberOfColorsUsed
Urn[NumberOfBalls] # the number of balls did not change
<- NumberOfBalls
NumberOfBalls else {
} # draw a ball which is not black (whichBall)
# return the ball and add another one like it
+ 1] <- WhichBall
Urn[NumberOfBalls # increase the counter of how many balls we have in the urn
<- NumberOfBalls + 1
NumberOfBalls
}
}<- length(unique(Urn)) - 1
Numberofnonblackcolors <- max(table(Urn))
Numberofballsofcommonestcolor <- table(Urn)
distribution list(
Numberofnonblackcolors = Numberofnonblackcolors,
Numberofballsofcommonestcolor = Numberofballsofcommonestcolor,
distribution = distribution
)
}
# test the function
UrnSim(2, 3, 50, 1)
$Numberofnonblackcolors
[1] 6
$Numberofballsofcommonestcolor
[1] 20
$distribution
Urn
0 1 3 4 5 6 7
1 13 20 1 7 5 4
<- c(1:10)
weight
<- weight %>% map(function(x) {
mylist <- 10000 # how many urns to simulate
NumTrials <- rep(0, NumTrials) # somewhere to put the results
TrialResults for (i in 1:length(TrialResults)) {
<- UrnSim(2, 3, 50, x)$Numberofnonblackcolors
TrialResults[i]
}mean(TrialResults)
})
<- data.frame(weight = c(1:10), Numberofnonblackcolors = mylist %>% unlist())
df
%>% kable() df
weight | Numberofnonblackcolors |
---|---|
1 | 3.9625 |
2 | 6.3618 |
3 | 8.4013 |
4 | 10.0654 |
5 | 11.6262 |
6 | 12.9812 |
7 | 14.3008 |
8 | 15.4330 |
9 | 16.4470 |
10 | 17.4720 |
ggplot(df, aes(x = weight, y = Numberofnonblackcolors)) +
geom_point() +
theme_minimal() +
geom_smooth(se = FALSE) +
scale_x_continuous(breaks = seq(1, 10, 1)) +
labs(title = "the expected number of different (non-black) colors in the Urn \n at the end as a function of the weight of the black ball", x = "weight of black ball", y = "different (non-black) colors \n (mean of 10000 simulations)")
Reuse
Citation
@online{xu2020,
author = {Keren Xu},
editor = {},
title = {Urn {Problem}},
date = {2020-05-01},
url = {https://xukeren.github.io//posts/2020-05-01-urn-problem},
langid = {en}
}