typecode

RData Analysis/analysis.R

1summarize_dataset <- function(df) {

2 numeric_cols <- sapply(df, is.numeric)

3 stats <- lapply(df[, numeric_cols, drop = FALSE], function(col) {

4 list(

5 mean = mean(col, na.rm = TRUE),

6 median = median(col, na.rm = TRUE),

7 sd = sd(col, na.rm = TRUE),

8 missing = sum(is.na(col))

9 )

10 })

11 do.call(rbind, lapply(stats, as.data.frame))

12}

14normalize <- function(x) {

15 rng <- range(x, na.rm = TRUE)

16 (x - rng[1]) / (rng[2] - rng[1])

17}

19detect_outliers <- function(df, col, threshold = 1.5) {

20 values <- df[[col]]

21 q1 <- quantile(values, 0.25, na.rm = TRUE)

22 q3 <- quantile(values, 0.75, na.rm = TRUE)

23 iqr <- q3 - q1

24 lower <- q1 - threshold * iqr

25 upper <- q3 + threshold * iqr

26 df[values < lower | values > upper, ]

27}

29pivot_summary <- function(df, group_col, value_col) {

30 groups <- split(df[[value_col]], df[[group_col]])

31 result <- data.frame(

32 group = names(groups),

33 count = sapply(groups, length),

34 mean = sapply(groups, mean, na.rm = TRUE),

35 total = sapply(groups, sum, na.rm = TRUE),

36 row.names = NULL

37 )

38 result[order(-result$total), ]

39}

41correlation_matrix <- function(df) {

42 numeric_df <- df[, sapply(df, is.numeric)]

43 cor_mat <- cor(numeric_df, use = "pairwise.complete.obs")

44 round(cor_mat, 3)

45}

47bootstrap_mean <- function(x, n_boot = 1000) {

48 means <- replicate(n_boot, {

49 sample_data <- sample(x, length(x), replace = TRUE)

50 mean(sample_data)

51 })

52 list(

53 estimate = mean(means),

54 ci_lower = quantile(means, 0.025),

55 ci_upper = quantile(means, 0.975)

56 )

57}

0WPM

100%Accuracy

00:00Time

Progress