Apple Inc. Stock Analysis

This project provides insights into Apple’s stock performance by analyzing historical trading data, including trends in monthly average closing prices, the distribution of trading days across weekdays, and the applicability of the Central Limit Theorem through sampling methods.

Apple Inc. Stock Dataset

The Apple stock dataset captures daily trading information for Apple Inc. (AAPL), including opening and closing prices, highs, lows, adjusted closing prices, trading volume, and derived metrics like daily returns, enabling comprehensive analysis of stock performance, volatility, and market trends from 1980 to 2025.

# Load necessary libraries
library(tidyverse)
library(lubridate)
library(plotly)

# Load dataset from project root
apple_stock <- read.csv("data/apple_stock.csv", stringsAsFactors = FALSE)

# Convert Date column
apple_stock <- apple_stock %>%
  mutate(Date = as.Date(X, format = "%m/%d/%y"))

# Check for missing values
sum(is.na(apple_stock))
## [1] 0

Derive New Variables:

apple_stock <- apple_stock %>%
  mutate(DayOfWeek = weekdays(Date),
         Return = (Close - lag(Close)) / lag(Close))

print(head(apple_stock))
##          X  Adj.Close    Close     High      Low     Open    Volume       Date
## 1 12/12/80 0.09883447 0.128348 0.128906 0.128348 0.128348 469033600 1980-12-12
## 2 12/15/80 0.09367821 0.121652 0.122210 0.121652 0.122210 175884800 1980-12-15
## 3 12/16/80 0.08680241 0.112723 0.113281 0.112723 0.113281 105728000 1980-12-16
## 4 12/17/80 0.08895087 0.115513 0.116071 0.115513 0.115513  86441600 1980-12-17
## 5 12/18/80 0.09152976 0.118862 0.119420 0.118862 0.118862  73449600 1980-12-18
## 6 12/19/80 0.09711570 0.126116 0.126674 0.126116 0.126116  48630400 1980-12-19
##   DayOfWeek      Return
## 1    Friday          NA
## 2    Monday -0.05217061
## 3   Tuesday -0.07339788
## 4 Wednesday  0.02475091
## 5  Thursday  0.02899246
## 6    Friday  0.06102867
sum_data <- data.frame(
  Min = min(apple_stock$Return, na.rm = TRUE),
  Q1 = quantile(apple_stock$Return, 0.25, na.rm = TRUE),
  Median = median(apple_stock$Return, na.rm = TRUE),
  Mean = mean(apple_stock$Return, na.rm = TRUE),
  Q3 = quantile(apple_stock$Return, 0.75, na.rm = TRUE),
  Max = max(apple_stock$Return, na.rm = TRUE)
)
rownames(sum_data) <- c("Daily Return Summary")
sum_data
##                            Min          Q1 Median        Mean         Q3
## Daily Return Summary -0.518692 -0.01262467      0 0.001072572 0.01437868
##                            Max
## Daily Return Summary 0.3322805

Analyze Categorical and Numerical Variables

day_counts <- apple_stock %>% group_by(DayOfWeek) %>% summarise(Count = n())
p_cat <- ggplot(day_counts, aes(x = DayOfWeek, y = Count, fill = DayOfWeek)) +
  geom_bar(stat = "identity") +
  labs(title = "Number of Trading Days by Day of Week", x = "Day", y = "Count") +
  theme_minimal()
ggplotly(p_cat)
p_num <- ggplot(apple_stock, aes(x = Close)) +
  geom_histogram(bins = 30, fill = "steelblue", color = "black") +
  labs(title = "Distribution of Apple Closing Prices", x = "Closing Price", y = "Frequency") +
  theme_minimal()
ggplotly(p_num)

Analyze the Relationship Between Numerical Variables

p_bi <- ggplot(apple_stock, aes(x = Open, y = Close)) +
  geom_point(alpha = 0.5, color = "darkgreen") +
  labs(title = "Scatter Plot: Open vs. Close Prices", x = "Open Price", y = "Close Price") +
  theme_minimal()
ggplotly(p_bi)

Examine the Distribution of a Numerical Variable:

p_return <- ggplot(apple_stock, aes(x = Return)) +
  geom_histogram(bins = 30, fill = "coral", color = "black") +
  labs(title = "Distribution of Daily Returns", x = "Daily Return", y = "Frequency") +
  theme_minimal()
ggplotly(p_return)

Central Limit Theorem Demonstration:

set.seed(123)
sample_means <- replicate(1000, {
  sample_data <- sample(apple_stock$Return, size = 30, replace = TRUE)
  mean(sample_data, na.rm = TRUE)
})
clt_data <- data.frame(SampleMean = sample_means)
p_clt <- ggplot(clt_data, aes(x = SampleMean)) +
  geom_histogram(bins = 30, fill = "lightblue", color = "black") +
  labs(title = "Sampling Distribution of Daily Return Means", x = "Sample Mean", y = "Frequency") +
  theme_minimal()
ggplotly(p_clt)
random_sample <- apple_stock %>% sample_n(500)
summary(random_sample$Return)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.1781602 -0.0125522  0.0000000  0.0006311  0.0125094  0.1929153
p_random <- ggplot(random_sample, aes(x = Return)) +
  geom_histogram(bins = 30, fill = "lightgreen", color = "black") +
  labs(title = "Histogram of Random Sample Returns", x = "Return", y = "Frequency") +
  theme_minimal()
ggplotly(p_random)
apple_stock <- apple_stock %>% mutate(Year = year(Date))
stratified_sample <- apple_stock %>% group_by(Year) %>% sample_frac(0.1)
summary(stratified_sample$Return)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.1781602 -0.0142297  0.0000000 -0.0001117  0.0142848  0.2370346
p_stratified <- ggplot(stratified_sample, aes(x = Return)) +
  geom_histogram(bins = 30, fill = "lightblue", color = "black") +
  labs(title = "Histogram of Stratified Sample Returns", x = "Return", y = "Frequency") +
  theme_minimal()
ggplotly(p_stratified)

Data Wrangling:

apple_stock <- apple_stock %>%
  mutate(Month = floor_date(Date, unit = "month"))

monthly_avg <- apple_stock %>%
  group_by(Month) %>%
  summarise(Avg_Close = mean(Close, na.rm = TRUE))

p_monthly <- ggplot(monthly_avg, aes(x = Month, y = Avg_Close)) +
  geom_line(color = "purple") +
  labs(title = "Monthly Average Closing Prices", x = "Month", y = "Average Close") +
  theme_minimal() +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(p_monthly)