This project provides insights into Apple’s stock performance by analyzing historical trading data, including trends in monthly average closing prices, the distribution of trading days across weekdays, and the applicability of the Central Limit Theorem through sampling methods.
The Apple stock dataset captures daily trading information for Apple Inc. (AAPL), including opening and closing prices, highs, lows, adjusted closing prices, trading volume, and derived metrics like daily returns, enabling comprehensive analysis of stock performance, volatility, and market trends from 1980 to 2025.
# Load necessary libraries
library(tidyverse)
library(lubridate)
library(plotly)
# Load dataset from project root
apple_stock <- read.csv("data/apple_stock.csv", stringsAsFactors = FALSE)
# Convert Date column
apple_stock <- apple_stock %>%
mutate(Date = as.Date(X, format = "%m/%d/%y"))
# Check for missing values
sum(is.na(apple_stock))
## [1] 0
apple_stock <- apple_stock %>%
mutate(DayOfWeek = weekdays(Date),
Return = (Close - lag(Close)) / lag(Close))
print(head(apple_stock))
## X Adj.Close Close High Low Open Volume Date
## 1 12/12/80 0.09883447 0.128348 0.128906 0.128348 0.128348 469033600 1980-12-12
## 2 12/15/80 0.09367821 0.121652 0.122210 0.121652 0.122210 175884800 1980-12-15
## 3 12/16/80 0.08680241 0.112723 0.113281 0.112723 0.113281 105728000 1980-12-16
## 4 12/17/80 0.08895087 0.115513 0.116071 0.115513 0.115513 86441600 1980-12-17
## 5 12/18/80 0.09152976 0.118862 0.119420 0.118862 0.118862 73449600 1980-12-18
## 6 12/19/80 0.09711570 0.126116 0.126674 0.126116 0.126116 48630400 1980-12-19
## DayOfWeek Return
## 1 Friday NA
## 2 Monday -0.05217061
## 3 Tuesday -0.07339788
## 4 Wednesday 0.02475091
## 5 Thursday 0.02899246
## 6 Friday 0.06102867
sum_data <- data.frame(
Min = min(apple_stock$Return, na.rm = TRUE),
Q1 = quantile(apple_stock$Return, 0.25, na.rm = TRUE),
Median = median(apple_stock$Return, na.rm = TRUE),
Mean = mean(apple_stock$Return, na.rm = TRUE),
Q3 = quantile(apple_stock$Return, 0.75, na.rm = TRUE),
Max = max(apple_stock$Return, na.rm = TRUE)
)
rownames(sum_data) <- c("Daily Return Summary")
sum_data
## Min Q1 Median Mean Q3
## Daily Return Summary -0.518692 -0.01262467 0 0.001072572 0.01437868
## Max
## Daily Return Summary 0.3322805
set.seed(123)
sample_means <- replicate(1000, {
sample_data <- sample(apple_stock$Return, size = 30, replace = TRUE)
mean(sample_data, na.rm = TRUE)
})
clt_data <- data.frame(SampleMean = sample_means)
p_clt <- ggplot(clt_data, aes(x = SampleMean)) +
geom_histogram(bins = 30, fill = "lightblue", color = "black") +
labs(title = "Sampling Distribution of Daily Return Means", x = "Sample Mean", y = "Frequency") +
theme_minimal()
ggplotly(p_clt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.1781602 -0.0125522 0.0000000 0.0006311 0.0125094 0.1929153
p_random <- ggplot(random_sample, aes(x = Return)) +
geom_histogram(bins = 30, fill = "lightgreen", color = "black") +
labs(title = "Histogram of Random Sample Returns", x = "Return", y = "Frequency") +
theme_minimal()
ggplotly(p_random)
apple_stock <- apple_stock %>% mutate(Year = year(Date))
stratified_sample <- apple_stock %>% group_by(Year) %>% sample_frac(0.1)
summary(stratified_sample$Return)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.1781602 -0.0142297 0.0000000 -0.0001117 0.0142848 0.2370346
apple_stock <- apple_stock %>%
mutate(Month = floor_date(Date, unit = "month"))
monthly_avg <- apple_stock %>%
group_by(Month) %>%
summarise(Avg_Close = mean(Close, na.rm = TRUE))
p_monthly <- ggplot(monthly_avg, aes(x = Month, y = Avg_Close)) +
geom_line(color = "purple") +
labs(title = "Monthly Average Closing Prices", x = "Month", y = "Average Close") +
theme_minimal() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(p_monthly)