Untitled diff
115 lines
<!--head
<!--head
Title:          Correlations
Title:          Korrelációs együtthatók
Author:         Daróczi Gergely
Author:         Daróczi Gergely
Email:          gergely@snowl.net
Email:          gergely@snowl.net
Description:    This template will return the correlation matrix of supplied numerical variables.
Description:    Folytonos változók közötti lineáris összefüggések vizsgálata. ## TODO: update
Data required:  TRUE
Data required:  TRUE
Example:        rapport('correlations', data=ius2008, vars=c('age', 'edu'))
Strict:         TRUE
		rapport('correlations', data=ius2008, vars=c('age', 'edu', 'leisure'))
Example:        rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu'))
		rapport('correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'))
                rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu', 'leisure'))
                rapport('i18n/hu/correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'))
vars            | *numeric[2,50]| Variable                      | Numerical variables
vars        | *numeric[2,50]| Változók              | Folytonos változók
cor.matrix      | TRUE          | Correlation matrix            | Show correlation matrix (numbers)?
cor.matrix  | TRUE          | Korrelációs mátrix    | Korrelációs mátrix hozzáadása
cor.plot        | TRUE          | Scatterplot matrix            | Show scatterplot matrix (image)?
cor.plot    | TRUE          | Pontdiagram           | Pontdiagram hozzáadása
quick.plot      | TRUE          | Using a sample for plotting   | If set to TRUE, the scatterplot matrix will be drawn on a sample size of max. 1000 cases not to render millions of points.
quick.plot  | TRUE          | Minta ábrázolása      | A teljes adatbázis helyett egy maximum 1000 fős minta kerül ábrázolásra.
head-->
head-->
# Variable description
<%
## setting Hungarian locale and returning NULL not be exported to report
options('p.copula' = 'és'); NULL
%>
<%=length(vars)%> variables provided.
# Változó-információk
<%=length(vars)%> változó vizsgálata:
<%=
<%=
cm <- cor(vars, use = 'complete.obs')
cm <- cor(vars, use = 'complete.obs')
diag(cm) <- NA
diag(cm) <- NA
%>
%>
<%if (length(vars) >2 ) {%>
<%if (length(vars) >2 ) {%>
The highest correlation coefficient (<%=max(cm, na.rm=T)%>) is between <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%> and the lowest (<%=min(cm, na.rm=T)%>) is between <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%>. It seems that the strongest association (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) is between <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%>.
A legmagasabb korrelációs együtthatót (<%=max(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%>, és a legalacsonyabb értéket (<%=min(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%> változók között találjuk. Úgy tűnik, hogy a legerősebb kapcsolat (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) a(z) <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%> változók között található.
<%}%>
<%}%>
<%
<%
cm[upper.tri(cm)] <- NA
cm[upper.tri(cm)] <- NA
h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T)
h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T)
if (nrow(h) > 0) {
if (nrow(h) > 0) {
%>
%>
Highly correlated (r < -0.7 or r > 0.7) variables:
Erős összefüggést mutató (r < -0.7 or r > 0.7) változók:
<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%>
<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%>
<%} else {%>
<%} else {%>
There are no highly correlated (r < -0.7 or r > 0.7) variables.
Nincsenek erős összefüggést mutató (r < -0.7 or r > 0.7) változók.
<%}%>
<%}%>
<%
<%
h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T)
h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T)
if (nrow(h) > 0) {
if (nrow(h) > 0) {
%>
%>
Uncorrelated (-0.2 < r < 0.2) variables:
Korrelálatlan (-0.2 < r < 0.2) változók:
<%=
<%=
if (nrow(h) > 0)
if (nrow(h) > 0)
    paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')
    paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')
%>
%>
<%} else {%>
<%} else {%>
There are no uncorrelated correlated (r < -0.2 or r > 0.2) variables.
Nincsenek korrelálatlan (-0.2 < r < 0.2) változók.
<%}%>
<%}%>
## <%=if (cor.matrix) 'Correlation matrix'%>
## <%=if (cor.matrix) 'Korrelációs mátrix'%>
<%=
<%=
if (cor.matrix) {
if (cor.matrix) {
    set.caption('Correlation matrix')
    set.caption('Correlation matrix')
    cm <- round(cor(vars, use = 'complete.obs'), 4)
    cm <- round(cor(vars, use = 'complete.obs'), 4)
    d <- attributes(cm)
    d <- attributes(cm)
    for (row in attr(cm, 'dimnames')[[1]])
    for (row in attr(cm, 'dimnames')[[1]])
	for (col in attr(cm, 'dimnames')[[2]]) {
	for (col in attr(cm, 'dimnames')[[2]]) {
	    test.p <- cor.test(vars[, row], vars[, col])$p.value
	    test.p <- cor.test(vars[, row], vars[, col])$p.value
	    cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='')
	    cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='')
	}
	}
    diag(cm) <- ''
    diag(cm) <- ''
    set.alignment('centre', 'right')
    set.alignment('centre', 'right')
    as.data.frame(cm)
    as.data.frame(cm)
}
}
%>
%>
Where the stars represent the [significance levels](http://en.wikipedia.org/wiki/Statistical_significance) of the bivariate correlation coefficients: one star for `0.05`, two for `0.01` and three  for `0.001`.
Ahol a csillagok száma a [szignifikancia szintet](http://en.wikipedia.org/wiki/Statistical_significance) jelöli: egy csillag `0,05`, kettő `0,01` és három csillag `0.001` p értéknél.
<%=
<%=
if (cor.plot) {
if (cor.plot) {
    labels <- lapply(vars, rp.name)
    labels <- lapply(vars, rp.name)
    if (quick.plot)
    if (quick.plot)
        if (nrow(vars) > 1000)
        if (nrow(vars) > 1000)
            vars <- vars[sample(1:nrow(vars), size = 1000), ]
            vars <- vars[sample(1:nrow(vars), size = 1000), ]
    ## custom panels
    ## custom panels
    panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
    panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
        ## forked from ?pairs
        ## forked from ?pairs
        par(usr = c(0, 1, 0, 1))
        par(usr = c(0, 1, 0, 1))
        r   <- cor(x, y, use = 'complete.obs')
        r   <- cor(x, y, use = 'complete.obs')
        txt <- format(c(r, 0.123456789), digits = digits)[1]
        txt <- format(c(r, 0.123456789), digits = digits)[1]
        txt <- paste(prefix, txt, sep = "")
        txt <- paste(prefix, txt, sep = "")
        if(missing(cex.cor))
        if(missing(cex.cor))
            cex <- 0.8/strwidth(txt)
            cex <- 0.8/strwidth(txt)
        test <- cor.test(x,y)
        test <- cor.test(x,y)
        Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
        Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
                         cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
                         cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
                         symbols = c("***", "**", "*", ".", " "))
                         symbols = c("***", "**", "*", ".", " "))
        text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5)
        text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5)
        text(.8, .8, Signif, cex = cex, col = 2)
        text(.8, .8, Signif, cex = cex, col = 2)
    }
    }
    ## plot
    ## plot
    set.caption(sprintf('Scatterplot matrix%s', ifelse(quick.plot, ' (based on a sample size of 1000)', '')))
    set.caption(sprintf('Pontdiagram%s', ifelse(quick.plot, ' (n = 1000)', '')))
    pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels)
    pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels)
}
}
%>
%>