typecode
RData Analysis/analysis.R
1summarize_dataset <- function(df) {
2 numeric_cols <- sapply(df, is.numeric)
3 stats <- lapply(df[, numeric_cols, drop = FALSE], function(col) {
4 list(
5 mean = mean(col, na.rm = TRUE),
6 median = median(col, na.rm = TRUE),
7 sd = sd(col, na.rm = TRUE),
8 missing = sum(is.na(col))
9 )
10 })
11 do.call(rbind, lapply(stats, as.data.frame))
12}
13
14normalize <- function(x) {
15 rng <- range(x, na.rm = TRUE)
16 (x - rng[1]) / (rng[2] - rng[1])
17}
18
19detect_outliers <- function(df, col, threshold = 1.5) {
20 values <- df[[col]]
21 q1 <- quantile(values, 0.25, na.rm = TRUE)
22 q3 <- quantile(values, 0.75, na.rm = TRUE)
23 iqr <- q3 - q1
24 lower <- q1 - threshold * iqr
25 upper <- q3 + threshold * iqr
26 df[values < lower | values > upper, ]
27}
28
29pivot_summary <- function(df, group_col, value_col) {
30 groups <- split(df[[value_col]], df[[group_col]])
31 result <- data.frame(
32 group = names(groups),
33 count = sapply(groups, length),
34 mean = sapply(groups, mean, na.rm = TRUE),
35 total = sapply(groups, sum, na.rm = TRUE),
36 row.names = NULL
37 )
38 result[order(-result$total), ]
39}
40
41correlation_matrix <- function(df) {
42 numeric_df <- df[, sapply(df, is.numeric)]
43 cor_mat <- cor(numeric_df, use = "pairwise.complete.obs")
44 round(cor_mat, 3)
45}
46
47bootstrap_mean <- function(x, n_boot = 1000) {
48 means <- replicate(n_boot, {
49 sample_data <- sample(x, length(x), replace = TRUE)
50 mean(sample_data)
51 })
52 list(
53 estimate = mean(means),
54 ci_lower = quantile(means, 0.025),
55 ci_upper = quantile(means, 0.975)
56 )
57}
0WPM
100%Accuracy
00:00Time
0%
Progress