<- tibble(
pdata x = seq(0, 20),
)<- list()
plts <- 1
i
for (n in seq(1, 10, 2)) {
<- paste0("y", n)
cname <- pdata |> mutate({{ cname }} := dpois(`x`, n))
pdata
}
<- ggplot(data = pdata, aes(x = x)) +
p1 geom_col(aes(y = y1)) +
ggtitle("mean=1")
<- ggplot(data = pdata, aes(x = x)) +
p3 geom_col(aes(y = y3)) +
ggtitle("mean=3")
<- ggplot(data = pdata, aes(x = x)) +
p5 geom_col(aes(y = y5)) +
ggtitle("mean=5")
<- ggplot(data = pdata, aes(x = x)) +
p7 geom_col(aes(y = y7)) +
ggtitle("mean=7")
<- ggplot(data = pdata, aes(x = x)) +
p9 geom_col(aes(y = y9)) +
ggtitle("mean=9")
grid.arrange(p1, p3, p5, p7, p9)
HW2 - Comments on Poisson
Fundamentals of Data Science
The Poisson Distribution
The Poisson distribution measures the probability of \(k\) events occurring in a given time interval, assuming:
- the interarrival times are independent of one another
- the mean number of arrivals in any interval is a constant \(\lambda\).
The Exponential Distribution
The exponential distribution is a continuous probability distribution given by an exponential function with a fixed rate \(\lambda\).
<- seq(0, 3, .01)
x <- dexp(x, 1)
y ggplot() +
geom_line(aes(x = x, y = y)) +
ggtitle("Exponential Distribution with parameter 1")
Poisson Process
In a “Poisson Process”, events occur randomly in time. The interval between two consecutive events is chosen (independently) from an exponential distribution.
<- rexp(100, 1)
t <- cumsum(t)
arrivals ggplot() +
geom_col(aes(x = arrivals, y = 1), width = .015, color = "black") +
ggtitle("Arrival Times in a Poisson Process")
sum(arrivals < 25)
[1] 32
sum(arrivals < 50)
[1] 50
sum(arrivals > 25 & arrivals < 50)
[1] 18