home *** CD-ROM | disk | FTP | other *** search
- bartlett.test <- function(x, g)
- {
- if (is.list(x)) {
- if (length(x) < 2)
- stop("x must be a list with at least 2 elements")
- DNAME <- deparse(substitute(x))
- x <- lapply(x, function(x) x <- x[is.finite(x)])
- k <- length(x)
- }
- else {
- if (length(x) != length(g))
- stop("x and g must have the same length")
- DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(g)))
- OK <- complete.cases(x, g)
- x <- x[OK]
- g <- as.factor(g[OK])
- k <- nlevels(g)
- if (k < 2)
- stop("all observations are in the same group")
- x <- split(x, g)
- }
-
- n <- sapply(x, "length") - 1
- if (any(n <= 0))
- stop("there must be at least 2 observations in each group")
- v <- sapply(x, "var")
- n.total <- sum(n)
- v.total <- sum(n * v) / n.total
- STATISTIC <- ((n.total * log(v.total) - sum(n * log(v))) /
- (1 + (sum(1 / n) - 1 / n.total) / (3 * (k - 1))))
- names(STATISTIC) <- "Bartlett's K-square"
- PARAMETER <- k - 1
- names(PARAMETER) <- "df"
-
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = 1 - pchisq(STATISTIC, PARAMETER),
- data.name = DNAME,
- method = "Bartlett test for homogeneity of variances")
- class(RVAL) <- "htest"
- return(RVAL)
- }
- binom.test <- function(x, n, p = 0.5, alternative = "two.sided")
- {
- if ((length(n) > 1) || is.na(n) || (n < 1) || (n != round(n)))
- stop("n must be a positive integer")
- if ((length(x) > 1) || is.na(x) || (x < 0) || (x > n) || (x != round(x)))
- stop("x must be an integer between 0 and n")
- if (!missing(p) && (length(p) > 1 || is.na(p) || p < 0 || p > 1))
- stop ("p must be a single number between 0 and 1")
-
- CHOICES <- c("two.sided", "less", "greater")
- alternative <- CHOICES[pmatch(alternative, CHOICES)]
- if (length(alternative) > 1 || is.na(alternative))
- stop ("alternative must be \"two.sided\", \"less\" or \"greater\"")
-
- DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(n)))
- if (alternative == "less")
- PVAL <- pbinom(x, n, p)
- else if (alternative == "greater")
- PVAL <- 1 - pbinom(x - 1, n, p)
- else {
- eps <- 10^(-6)
- if (x/n < p) {
- PVAL <- pbinom(x, n, p)
- v <- 1 - pbinom(0:n, n, p)
- }
- else {
- PVAL <- 1 - pbinom(x - 1, n, p)
- v <- pbinom(0:n, n, p)
- }
- PVAL <- min(1, PVAL + max(v[v <= (1 + eps) * PVAL]))
- }
-
- names(x) <- "number of successes" # or simply "x" ??
- names(n) <- "number of trials" # or simply "n" ??
- names(p) <- "probability of success" # or simply "p" ??
-
- RVAL <- list(statistic = x,
- parameter = n,
- p.value = PVAL,
- null.value = p,
- alternative = alternative,
- method = "Exact binomial test",
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- chisq.test <- function(x, y = NULL, correct = TRUE,
- p = rep(1 / length(x), length(x)))
- {
- DNAME <- deparse(substitute(x))
- if (is.matrix(x)) {
- if (min(dim(x)) == 1)
- x <- as.vector(x)
- }
- if (!is.matrix(x) && !is.null(y)) {
- if (length(x) != length(y))
- stop("x and y must have the same length")
- DNAME <- paste(DNAME, "and", deparse(substitute(y)))
- OK <- complete.cases(x, y)
- x <- as.factor(x[OK])
- y <- as.factor(y[OK])
- if ((nlevels(x) < 2) || (nlevels(y) < 2))
- stop("x and y must have at least 2 levels")
- x <- table(x, y)
- }
-
- if (any(x < 0) || any(is.na(x)))
- stop("all entries of x must be nonnegative and finite")
-
- if (is.matrix(x)) {
- METHOD <- "Pearson's Chi-square test"
- E <- outer(apply(x, 1, sum), apply(x, 2, sum), "*") / sum(x)
- if (correct && nrow(x) == 2 && ncol(x) == 2) {
- YATES <- .5
- METHOD <- paste(METHOD, "with Yates' continuity correction")
- }
- else
- YATES <- 0
- dimnames(E) <- dimnames(x)
- STATISTIC <- sum((abs(x - E) - YATES)^2 / E)
- PARAMETER <- (nrow(x) - 1) * (ncol(x) - 1)
- }
- else {
- if (length(x) == 1)
- stop("x must at least have 2 elements")
- if (length(x) != length(p))
- stop("x and p must have the same number of elements")
- METHOD <- "Chi-square test for given probabilities"
- E <- sum(x) * p
- names(E) <- names(x)
- STATISTIC <- sum((x - E) ^ 2 / E)
- PARAMETER <- length(x) - 1
- }
-
- names(STATISTIC) <- "X-squared"
- names(PARAMETER) <- "df"
- if (any(E < 5))
- warning("Chi-square approximation may be incorrect")
- PVAL <- 1 - pchisq(STATISTIC, PARAMETER)
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = PVAL,
- method = METHOD,
- data.name = DNAME,
- observed = x,
- expected = E)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- cor.test <- function(x, y, alternative = "two.sided", method = "pearson")
- {
- CHOICES <- c("two.sided", "less", "greater")
- alternative <- CHOICES[pmatch(alternative, CHOICES)]
- if (length(alternative) > 1 || is.na(alternative))
- stop("alternative must be \"two.sided\", \"less\" or \"greater\"")
-
- CHOICES <- c("pearson", "kendall", "spearman")
- method <- CHOICES[pmatch(method, CHOICES)]
- if (length(method) > 1 || is.na(method))
- stop("method must be \"pearson\", \"kendall\" or \"spearman\"")
-
- DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
-
- if (length (x) != length (y))
- stop ("x and y must have the same length")
- OK <- complete.cases(x, y)
- x <- x[OK]
- n <- length(x)
- if (n < 3)
- stop("not enough finite observations")
- else
- y <- y[OK]
-
- NVAL <- 0
-
- if (method == "pearson") {
- method <- "Pearson's product-moment correlation"
- names(NVAL) <- "correlation"
- r <- cor(x, y)
- ESTIMATE <- r
- names(ESTIMATE) <- "cor"
- PARAMETER <- n - 2
- names(PARAMETER) <- "df"
- STATISTIC <- sqrt(PARAMETER) * r / sqrt(1 - r^2)
- names(STATISTIC) <- "t"
- PVAL <- pt(STATISTIC, PARAMETER)
- }
- else {
- if (method == "kendall") {
- method <- "Kendall's rank correlation tau"
- names(NVAL) <- "tau"
- x <- rank(x)
- y <- rank(y)
- ESTIMATE <- cor (c(sign(outer(x, x, "-"))),
- c(sign(outer(y, y, "-"))))
- names(ESTIMATE) <- "tau"
- STATISTIC <- ESTIMATE / sqrt ((4*n + 10) / (9 * n * (n-1)))
- }
- else {
- method <- "Spearman's rank correlation rho"
- names(NVAL) <- "rho"
- ESTIMATE <- cor (rank(x), rank(y))
- names(ESTIMATE) <- "rho"
- STATISTIC <- sqrt (n-1) * (ESTIMATE - 6 / (n^3 - n))
- }
- PARAMETER <- NULL
- names(STATISTIC) <- "z"
- PVAL <- pnorm(STATISTIC)
- }
-
- if (alternative == "two.sided")
- PVAL <- 2 * min (PVAL, 1 - PVAL)
- else if (alternative == "greater")
- PVAL <- 1 - PVAL
-
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = PVAL,
- estimate = ESTIMATE,
- null.value = NVAL,
- alternative = alternative,
- method = method,
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- fisher.test <- function(x, y = NULL, alternative = "two.sided")
- {
- DNAME <- deparse(substitute(x))
- if (is.matrix(x)) {
- if (any(dim(x) < 2))
- stop("x must have at least 2 rows and columns")
- if (any(x < 0) || any(is.na(x)))
- stop("all entries of x must be nonnegative and finite")
- }
- else {
- if (is.null(y))
- stop("if x is not a matrix, y must be given")
- if (length(x) != length(y))
- stop("x and y must have the same length")
- DNAME <- paste(DNAME, "and", deparse(substitute(y)))
- OK <- complete.cases(x, y)
- x <- as.factor(x[OK])
- y <- as.factor(y[OK])
- if ((nlevels(x) < 2) || (nlevels(y) < 2))
- stop("x and y must have at least 2 levels")
- x <- table(x, y)
- }
-
- if (any(dim(x) != c(2, 2)))
- stop("Sorry, only 2 by 2 tables are currently implemented")
-
- CHOICES <- c("two.sided", "less", "greater")
- alternative <- CHOICES[pmatch(alternative, CHOICES)]
- if (length(alternative) > 1 || is.na(alternative))
- stop("alternative must be \"two.sided\", \"less\" or \"greater\"")
-
- m <- sum(x[, 1])
- n <- sum(x[, 2])
- k <- sum(x[1, ])
- x <- x[1, 1]
- PVAL <- switch(alternative,
- less = phyper(x, m, n, k),
- greater = 1 - phyper(x - 1, m, n, k),
- two.sided = {
- eps <- 10^(-6)
- if ((PVAL <- phyper(x, m, n, k)) < .5)
- v <- phyper(0:k, m, n, k)
- else {
- v <- 1 - phyper(0:k, m, n, k)
- PVAL <- 1 - phyper(x - 1, m, n, k)
- }
- min(1, PVAL + max(v[v <= (1 + eps) * PVAL]))
- })
- RVAL <- list(p.value = PVAL,
- alternative = alternative,
- method = "Fisher's Exact Test for Count Data",
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- friedman.test <- function(y, groups, blocks)
- {
- DNAME <- deparse(substitute(y))
- if (is.matrix(y)) {
- groups <- as.factor(c(col(y)))
- blocks <- as.factor(c(row(y)))
- }
- else {
- if (any(is.na(c(groups, blocks))))
- stop("NA's are not allowed in groups or blocks")
- if (any(diff(c(length(y), length(groups), length(blocks)))))
- stop("y, groups and blocks must have the same length")
- DNAME <- paste(DNAME, ", ", deparse(substitute(groups)), " and ",
- deparse(substitute(blocks)), sep = "")
- if (any(table(groups, blocks) != 1))
- stop("Not an unreplicated complete block design")
- groups <- as.factor(groups)
- blocks <- as.factor(blocks)
- }
-
- k <- nlevels(groups)
- y <- matrix(unlist(split(y, blocks)), ncol = k, byrow = T)
- y <- y[complete.cases(y), ]
- n <- nrow(y)
- r <- t(apply(y, 1, rank))
- TIES <- apply(r, 1, table)
- STATISTIC <- ((12 * sum((apply(r, 2, sum) - n * (k + 1) / 2)^2)) /
- (n * k * (k + 1)
- - (sum(unlist(lapply(TIES, function (u) {u^3 - u}))) /
- (k - 1))))
- PARAMETER <- k - 1
- names(STATISTIC) <- "Friedman chi-square"
- names(PARAMETER) <- "df"
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = 1 - pchisq(STATISTIC, PARAMETER),
- method = "Friedman rank sum test",
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- kruskal.test <- function(x, g)
- {
- if (is.list(x)) {
- if (length(x) < 2)
- stop("x must be a list with at least 2 elements")
- DNAME <- deparse(substitute(x))
- x <- lapply(x, function(x) x <- x[is.finite(x)])
- k <- length(x)
- g <- as.factor(rep(1 : k, sapply(x, "length")))
- x <- unlist(x)
- }
- else {
- if (length(x) != length(g))
- stop("x and g must have the same length")
- DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(g)))
- if (!all(is.finite(g)))
- stop("all group levels must be finite")
- OK <- !is.na(x)
- x <- x[OK]
- g <- as.factor(g[OK])
- k <- nlevels(g)
- if (k < 2)
- stop("all observations are in the same group")
- }
-
- n <- length(x)
- if (n < 2)
- stop("not enough observations")
- r <- rank(x)
- TIES <- table(x)
- STATISTIC <- sum(tapply(r, g, "sum")^2 / tapply(r, g, "length"))
- STATISTIC <- ((12 * STATISTIC / (n * (n + 1)) - 3 * (n + 1)) /
- (1 - sum(TIES^3 - TIES) / (n^3 - n)))
- names(STATISTIC) <- "Kruskal-Wallis chi-square"
- PARAMETER <- k - 1
- names(PARAMETER) <- "df"
-
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = 1 - pchisq(STATISTIC, PARAMETER),
- method = "Kruskal-Wallis rank sum test",
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- ks.test <- function(x, y, ..., alternative = "two.sided")
- {
- CHOICES <- c("two.sided", "less", "greater")
- alternative <- CHOICES[pmatch(alternative, CHOICES)]
- if (length(alternative) > 1 || is.na(alternative))
- stop("alternative must be \"two.sided\", \"less\" or \"greater\"")
-
- DNAME <- deparse(substitute(x))
- x <- x[!is.na(x)]
- n <- length(x)
- if (n < 1)
- stop("Not enough x data")
-
- if (is.numeric(y)) {
- DNAME <- paste(DNAME, "and", deparse(substitute(y)))
- y <- y[!is.na(y)]
- n.x <- n
- n.y <- length(y)
- if (n.y < 1)
- stop("Not enough y data")
- METHOD <- "Two-sample Kolmogorov-Smirnov test"
- n <- n.x * n.y / (n.x + n.y)
- z <- ifelse (order(c(x, y)) <= n.x, 1/n.x, -1/n.y)
- STATISTIC <- switch(alternative,
- "two.sided" = max(abs(cumsum(z))),
- "greater" = max(cumsum(z)),
- "less" = - min(cumsum(z)))
- }
- else {
- if (is.character(y))
- y <- get(y, mode="function")
- if (mode(y) != "function")
- stop("y must be numeric or a string naming a valid function")
- METHOD <- "One-sample Kolmogorov-Smirnov test"
- n <- length(x)
- x <- y(sort(x), ...) - (0:(n-1))/n
- STATISTIC <- switch(alternative,
- "two.sided" = max(abs(c(x, x-1/n))),
- "greater" = max(c(x, x-1/n)),
- "less" = - min(c(x, x-1/n)))
- }
-
- names(STATISTIC) <- switch(alternative,
- "two.sided" = "D",
- "greater" = "D^+",
- "less" = "D^-")
- PVAL <- ifelse(alternative == "two.sided",
- 1 - pks(sqrt(n) * STATISTIC),
- exp(- 2 * n * STATISTIC^2))
-
- RVAL <- list(statistic = STATISTIC,
- p.value = PVAL,
- alternative = alternative,
- method = METHOD,
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
-
-
- mantelhaen.test <- function(x, y = NULL, z = NULL, correct = TRUE)
- {
- DNAME <- deparse(substitute(x))
- if (is.array(x)) {
- if (length(dim(x)) == 3) {
- if (any(is.na(x)))
- stop("NAs are not allowed")
- if (dim(x)[1:2] != c(2, 2))
- stop("table for each stratum must be 2 by 2")
- }
- else
- stop("x must be a 3-dimensional array")
- }
- else {
- if (is.null(y))
- stop("If x is not an array, y must be given")
- if (is.null(z))
- stop("If x is not an array, z must be given")
- if (any(diff(c(length(x), length(y), length(z)))))
- stop("x, y, and z must have the same length")
- DNAME <- paste(DNAME, "and", deparse(substitute(y)), "and",
- deparse(substitute(z)))
- OK <- complete.cases(x, y, z)
- x <- as.factor(x[OK])
- y <- as.factor(y[OK])
- if ((nlevels(x) != 2) || (nlevels(y) != 2))
- stop("x and y must be dichotomous")
- else
- x <- table(x, y, z[OK])
- }
-
- s.x <- apply(x, c(1, 3), sum)
- s.y <- apply(x, c(2, 3), sum)
- n <- apply(x, 3, sum)
- if (any(n < 2))
- stop("sample size in each stratum must be > 1")
- DELTA <- abs(sum(x[1, 1, ] - s.x[1, ] * s.y[1, ] / n))
- YATES <- ifelse(correct && (DELTA >= .5), .5, 0)
- STATISTIC <- ((DELTA - YATES)^2 /
- sum(apply(rbind(s.x, s.y), 2, prod) / (n^2 * (n - 1))))
- PARAMETER <- 1
- names(STATISTIC) <- "Mantel-Haenszel X-square"
- names(PARAMETER) <- "df"
-
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = 1 - pchisq(STATISTIC, PARAMETER),
- method = paste("Mantel-Haenszel chi-square test",
- ifelse(YATES, "with", "without"),
- "continuity correction"),
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- mcnemar.test <- function(x, y = NULL, correct = TRUE)
- {
- if (is.matrix(x)) {
- r <- nrow(x)
- if ((r < 2) || (ncol (x) != r))
- stop("x must be square with at least two rows and columns")
- if (any(x < 0) || any(is.na(x)))
- stop("all entries of x must be nonnegative and finite")
- DNAME <- deparse(substitute(x))
- }
- else {
- if (is.null(y))
- stop("if x is not a matrix, y must be given")
- if (length(x) != length(y))
- stop("x and y must have the same length")
- DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
- OK <- complete.cases(x, y)
- x <- as.factor(x[OK])
- y <- as.factor(y[OK])
- r <- nlevels(x)
- if ((r < 2) || (nlevels(y) != r))
- stop("x and y must have the same number of levels (minimum 2)")
- x <- table(x, y)
- }
-
- PARAMETER <- r * (r-1) / 2
- names(PARAMETER) <- "df"
- METHOD <- "McNemar's Chi-square test"
-
- if (correct && (r == 2) && any(x - t(x))) {
- y <- (abs(x - t(x)) - 1)
- METHOD <- paste(METHOD, "with continuity correction")
- }
- else
- y <- x - t(x)
- x <- x + t(x)
-
- STATISTIC <- sum(y[upper.tri(x)]^2 / x[upper.tri(x)])
- names(STATISTIC) <- "McNemar's chi-square"
- PVAL <- 1 - pchisq(STATISTIC, PARAMETER)
-
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = PVAL,
- method = METHOD,
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- pks <- function(x, tol=10^(-6))
- {
- if (is.numeric(x))
- x <- as.vector(x)
- else
- stop("Argument x must be numeric")
-
- PKS <- rep(0, length(x))
- PKS[is.na(x)] <- NA
- IND <- which(!is.na(x) & (x > 0))
- if (length(IND) > 0) {
- k <- 1 : ceiling(sqrt(-log(tol)/2) / min(x[IND]))
- y <- outer(x[IND]^2, k,
- function (t, k) { (-1)^k * exp(-2 * t * k^2) })
- PKS[IND] <- 1 + 2 * apply(y, 1, "sum")
- }
- return(PKS)
- }
- prop.test <- function(x, n, p = NULL, alternative = "two.sided",
- conf.level = 0.95, correct = TRUE)
- {
- DNAME <- paste(deparse(substitute(x)), "out of", deparse(substitute(n)))
-
- if ((l <- length(x)) != length(n))
- stop("x and n must have the same length")
- OK <- complete.cases(x, n)
- x <- x[OK]
- n <- n[OK]
- if ((k <- length(x)) < 1)
- stop("Not enough data")
- if (any(n <= 0))
- stop("Elements of n must be positive")
- if (any(x < 0))
- stop("Elements of x must be nonnegative")
- if (any(x > n))
- stop("Elements of x must not be greater than those of n")
-
- if (is.null(p) && (k == 1))
- p <- .5
- if (!is.null(p)) {
- DNAME <- paste(DNAME, ", null ",
- ifelse(k == 1, "probability ", "probabilities "),
- deparse(substitute(p)), sep = "")
- if (length(p) != l)
- stop("p must have the same length as x and n")
- p <- p[OK]
- if (any((p <= 0) | (p >= 1)))
- stop("Elements of p must be in (0,1)")
- }
-
- CHOICES <- c("two.sided", "less", "greater")
- alternative <- CHOICES[pmatch(alternative, CHOICES)]
- if (length(alternative) > 1 || is.na(alternative))
- stop("alternative must be \"two.sided\", \"less\" or \"greater\"")
- if ((k > 2) || (k == 2) && !is.null(p))
- alternative <- "two.sided"
-
- if ((length(conf.level) != 1) || is.na(conf.level) ||
- (conf.level <= 0) || (conf.level >= 1))
- stop("conf.level must be a single number between 0 and 1")
-
- correct <- as.logical(correct)
-
- ESTIMATE <- x/n
- names(ESTIMATE) <- if (k == 1) "p" else paste("prop", 1:l)[OK]
- NVAL <- p
- CINT <- NULL
- YATES <- ifelse(correct && (k <= 2), .5, 0)
-
- if (k == 1) {
- z <- ifelse(alternative == "two.sided",
- qnorm((1 + conf.level) / 2),
- qnorm(conf.level))
- YATES <- min(YATES, abs(x - n * p))
- p.c <- ESTIMATE + YATES / n
- p.u <- ((p.c + z^2 / (2 * n)
- + z * sqrt(p.c * (1 - p.c) / n + z^2 / (4 * n^2)))
- / (1 + z^2 / n))
- p.c <- ESTIMATE - YATES / n
- p.l <- ((p.c + z^2 / (2 * n)
- - z * sqrt(p.c * (1 - p.c) / n + z^2 / (4 * n^2)))
- / (1 + z^2 / n))
- CINT <- switch(alternative,
- "two.sided" = c(max(p.l, 0), min(p.u, 1)),
- "greater" = c(max(p.l, 0), 1),
- "less" = c(0, min(p.u, 1)))
- }
- else if ((k == 2) & is.null(p)) {
- DELTA <- ESTIMATE[1] - ESTIMATE[2]
- YATES <- min(YATES, abs(DELTA) / sum(1/n))
- WIDTH <- (switch(alternative,
- "two.sided" = qnorm((1 + conf.level) / 2),
- qnorm(conf.level))
- * sqrt(sum(ESTIMATE * (1 - ESTIMATE) / n))
- + YATES * sum(1/n))
- CINT <- switch(alternative,
- "two.sided" = c(max(DELTA - WIDTH, -1),
- min(DELTA + WIDTH, 1)),
- "greater" = c(max(DELTA - WIDTH, -1), 1),
- "less" = c(-1, min(DELTA + WIDTH, 1)))
- }
- if (!is.null(CINT))
- attr(CINT, "conf.level") <- conf.level
-
- METHOD <- paste(ifelse(k == 1,
- "1-sample proportions test",
- paste(k, "-sample test for ",
- ifelse(is.null(p), "equality of", "given"),
- " proportions", sep = "")),
- ifelse(YATES, "with", "without"),
- "continuity correction")
-
- if (is.null(p)) {
- p <- sum(x)/sum(n)
- PARAMETER <- k - 1
- }
- else {
- PARAMETER <- k
- names(NVAL) <- names(ESTIMATE)
- }
- names(PARAMETER) <- "df"
-
- x <- cbind(x, n - x)
- E <- cbind(n * p, n * (1 - p))
- if (any(E < 5))
- warning("Chi-square approximation may be incorrect")
- STATISTIC <- sum((abs(x - E) - YATES)^2 / E)
- names(STATISTIC) <- "X-squared"
-
- if (alternative == "two.sided")
- PVAL <- 1 - pchisq(STATISTIC, PARAMETER)
- else {
- if (k == 1)
- z <- sign(ESTIMATE - p) * sqrt(STATISTIC)
- else
- z <- sign(DELTA) * sqrt(STATISTIC)
- if (alternative == "greater")
- PVAL <- 1 - pnorm(z)
- else
- PVAL <- pnorm(z)
- }
-
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = PVAL,
- estimate = ESTIMATE,
- null.value = NVAL,
- conf.int = CINT,
- alternative = alternative,
- method = METHOD,
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
- var.test <- function(x, y, ratio = 1, alternative = "two.sided",
- conf.level = 0.95)
- {
- if (!((length(ratio) == 1) && is.finite(ratio) && (ratio > 0)))
- stop("ratio must be a single positive number")
-
- alternative <- char.expand(alternative,
- c("two.sided", "less", "greater"))
- if ((length(alternative) > 1) || is.na(alternative))
- stop("alternative must be \"two.sided\", \"less\" or \"greater\"")
-
- if (!((length(conf.level) == 1) && is.finite(conf.level) &&
- (conf.level > 0) && (conf.level < 1)))
- stop("conf.level must be a single number between 0 and 1")
-
- DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
-
- x <- x[is.finite(x)]
- DF.x <- length(x) - 1
- if (DF.x < 1)
- stop("not enough x observations")
- y <- y[is.finite(y)]
- DF.y <- length(y) - 1
- if (DF.y < 1)
- stop("not enough y observations")
-
- V.x <- var(x)
- V.y <- var(y)
- ESTIMATE <- V.x / V.y
- STATISTIC <- ESTIMATE / ratio
- PARAMETER <- c(DF.x, DF.y)
-
- PVAL <- pf(STATISTIC, DF.x, DF.y)
- if (alternative == "two.sided") {
- PVAL <- 2 * min(PVAL, 1 - PVAL)
- BETA <- (1 - conf.level) / 2
- CINT <- c(ESTIMATE / qf(1 - BETA, DF.x, DF.y),
- ESTIMATE / qf(BETA, DF.x, DF.y))
- }
- else if (alternative == "greater") {
- PVAL <- 1 - PVAL
- CINT <- c(ESTIMATE / qf(conf.level, DF.x, DF.y), NA)
- }
- else
- CINT <- c(0, ESTIMATE / qf(1 - conf.level, DF.x, DF.y))
- names(STATISTIC) <- "F"
- names(PARAMETER) <- c("num df", "denom df")
- names(ESTIMATE) <- names(ratio) <- "ratio of variances"
- attr(CINT, "conf.level") <- conf.level
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = PVAL,
- conf.int = CINT,
- estimate = ESTIMATE,
- null.value = ratio,
- alternative = alternative,
- method = "F test to compare two variances",
- data.name = DNAME)
- attr(RVAL, "class") <- "htest"
- return(RVAL)
- }
- wilcox.test <- function(x, y = NULL, alternative = "two.sided", mu = 0,
- paired = FALSE, exact = FALSE, correct = TRUE)
- {
- CHOICES <- c("two.sided", "less", "greater")
- alternative <- CHOICES[pmatch(alternative, CHOICES)]
- if (length(alternative) > 1 || is.na(alternative))
- stop("alternative must be \"two.sided\", \"less\" or \"greater\"")
-
- if (!missing(mu) && ((length(mu) > 1) || !is.finite(mu)))
- stop("mu must be a single number")
-
- if (!is.null(y)) {
- DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
- if (paired) {
- if (length(x) != length(y))
- stop("x and y must have the same length")
- OK <- complete.cases(x, y)
- x <- x[OK] - y[OK]
- y <- NULL
- }
- else {
- x <- x[is.finite(x)]
- y <- y[is.finite(y)]
- }
- }
- else {
- DNAME <- deparse(substitute(x))
- if (paired)
- stop("y missing for paired test")
- x <- x[is.finite(x)]
- }
-
- if (length(x) < 1)
- stop("not enough x observations")
-
- if (exact)
- warning("exact Wilcoxon tests are currently not implemented\n")
-
- PARAMETER <- NULL
-
- CORRECTION <- 0
-
- if (is.null(y)) {
- METHOD <- "Wilcoxon signed rank test"
- x <- x - mu
- r <- rank(abs(x))
- STATISTIC <- sum(r[x > 0])
- names(STATISTIC) <- "V"
- n <- length(x)
- TIES <- table(r)
- z <- STATISTIC - n*(n+1)/4
- SIGMA <- sqrt(n*(n+1)*(2*n+1)/24 - sum(TIES^3-TIES)/48)
- }
- else {
- if (length(y) < 1)
- stop("not enough y observations")
- METHOD <- "Wilcoxon rank sum test"
- r <- rank(c(x - mu, y))
- STATISTIC <- sum(r[seq(along = x)])
- names(STATISTIC) <- "W"
- n.x <- length(x)
- n.y <- length(y)
- TIES <- table(r)
- z <- STATISTIC - n.x*(n.x+n.y+1)/2
- SIGMA <- sqrt((n.x*n.y/12) *
- ((n.x+n.y+1)
- - sum(TIES^3-TIES) / ((n.x+n.y)*(n.x+n.y+1))))
- }
-
- if (correct) {
- CORRECTION <- switch ("two.sided", sign(z) * 0.5,
- "greater", 0.5,
- "less", -0.5)
- METHOD <- paste(METHOD, "with continuity correction")
- }
-
- PVAL <- pnorm((z - CORRECTION) / SIGMA)
- if (alternative == "two.sided")
- PVAL <- 2 * min (PVAL, 1 - PVAL)
- else if (alternative == "greater")
- PVAL <- 1 - PVAL
-
- NVAL <- mu
- names(NVAL) <- "mu"
-
- RVAL <- list(statistic = STATISTIC,
- parameter = PARAMETER,
- p.value = PVAL,
- null.value = NVAL,
- alternative = alternative,
- method = METHOD,
- data.name = DNAME)
- class(RVAL) <- "htest"
- return(RVAL)
- }
-