<!--head <!--head
Title: Correlations Title: Korrelációs együtthatók
Author: Daróczi Gergely Author: Daróczi Gergely
Email: gergely@snowl.net Email: gergely@snowl.net
Description: This template will return the correlation matrix of supplied numerical variables. Description: Folytonos változók közötti lineáris összefüggések vizsgálata. ## TODO: update
Data required: TRUE Data required: TRUE
Example: rapport('correlations', data=ius2008, vars=c('age', 'edu')) Strict: TRUE
rapport('correlations', data=ius2008, vars=c('age', 'edu', 'leisure')) Example: rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu'))
rapport('correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb')) rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu', 'leisure'))
rapport('i18n/hu/correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'))
vars | *numeric[2,50]| Variable | Numerical variables vars | *numeric[2,50]| Változók | Folytonos változók
cor.matrix | TRUE | Correlation matrix | Show correlation matrix (numbers)? cor.matrix | TRUE | Korrelációs mátrix | Korrelációs mátrix hozzáadása
cor.plot | TRUE | Scatterplot matrix | Show scatterplot matrix (image)? cor.plot | TRUE | Pontdiagram | Pontdiagram hozzáadása
quick.plot | TRUE | Using a sample for plotting | If set to TRUE, the scatterplot matrix will be drawn on a sample size of max. 1000 cases not to render millions of points. quick.plot | TRUE | Minta ábrázolása | A teljes adatbázis helyett egy maximum 1000 fős minta kerül ábrázolásra.
head--> head-->
# Variable description <%
## setting Hungarian locale and returning NULL not be exported to report
options('p.copula' = 'és'); NULL
%>
<%=length(vars)%> variables provided. # Változó-információk
<%=length(vars)%> változó vizsgálata:
<%= <%=
cm <- cor(vars, use = 'complete.obs') cm <- cor(vars, use = 'complete.obs')
diag(cm) <- NA diag(cm) <- NA
%> %>
<%if (length(vars) >2 ) {%> <%if (length(vars) >2 ) {%>
The highest correlation coefficient (<%=max(cm, na.rm=T)%>) is between <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%> and the lowest (<%=min(cm, na.rm=T)%>) is between <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%>. It seems that the strongest association (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) is between <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%>. A legmagasabb korrelációs együtthatót (<%=max(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%>, és a legalacsonyabb értéket (<%=min(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%> változók között találjuk. Úgy tűnik, hogy a legerősebb kapcsolat (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) a(z) <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%> változók között található.
<%}%> <%}%>
<% <%
cm[upper.tri(cm)] <- NA cm[upper.tri(cm)] <- NA
h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T) h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T)
if (nrow(h) > 0) { if (nrow(h) > 0) {
%> %>
Highly correlated (r < -0.7 or r > 0.7) variables: Erős összefüggést mutató (r < -0.7 or r > 0.7) változók:
<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%> <%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%>
<%} else {%> <%} else {%>
There are no highly correlated (r < -0.7 or r > 0.7) variables. Nincsenek erős összefüggést mutató (r < -0.7 or r > 0.7) változók.
<%}%> <%}%>
<% <%
h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T) h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T)
if (nrow(h) > 0) { if (nrow(h) > 0) {
%> %>
Uncorrelated (-0.2 < r < 0.2) variables: Korrelálatlan (-0.2 < r < 0.2) változók:
<%= <%=
if (nrow(h) > 0) if (nrow(h) > 0)
paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n') paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')
%> %>
<%} else {%> <%} else {%>
There are no uncorrelated correlated (r < -0.2 or r > 0.2) variables. Nincsenek korrelálatlan (-0.2 < r < 0.2) változók.
<%}%> <%}%>
## <%=if (cor.matrix) 'Correlation matrix'%> ## <%=if (cor.matrix) 'Korrelációs mátrix'%>
<%= <%=
if (cor.matrix) { if (cor.matrix) {
set.caption('Correlation matrix') set.caption('Correlation matrix')
cm <- round(cor(vars, use = 'complete.obs'), 4) cm <- round(cor(vars, use = 'complete.obs'), 4)
d <- attributes(cm) d <- attributes(cm)
for (row in attr(cm, 'dimnames')[[1]]) for (row in attr(cm, 'dimnames')[[1]])
for (col in attr(cm, 'dimnames')[[2]]) { for (col in attr(cm, 'dimnames')[[2]]) {
test.p <- cor.test(vars[, row], vars[, col])$p.value test.p <- cor.test(vars[, row], vars[, col])$p.value
cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='') cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='')
} }
diag(cm) <- '' diag(cm) <- ''
set.alignment('centre', 'right') set.alignment('centre', 'right')
as.data.frame(cm) as.data.frame(cm)
} }
%> %>
Where the stars represent the [significance levels](http://en.wikipedia.org/wiki/Statistical_significance) of the bivariate correlation coefficients: one star for `0.05`, two for `0.01` and three for `0.001`. Ahol a csillagok száma a [szignifikancia szintet](http://en.wikipedia.org/wiki/Statistical_significance) jelöli: egy csillag `0,05`, kettő `0,01` és három csillag `0.001` p értéknél.
<%= <%=
if (cor.plot) { if (cor.plot) {
labels <- lapply(vars, rp.name) labels <- lapply(vars, rp.name)
if (quick.plot) if (quick.plot)
if (nrow(vars) > 1000) if (nrow(vars) > 1000)
vars <- vars[sample(1:nrow(vars), size = 1000), ] vars <- vars[sample(1:nrow(vars), size = 1000), ]
## custom panels ## custom panels
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
## forked from ?pairs ## forked from ?pairs
par(usr = c(0, 1, 0, 1)) par(usr = c(0, 1, 0, 1))
r <- cor(x, y, use = 'complete.obs') r <- cor(x, y, use = 'complete.obs')
txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste(prefix, txt, sep = "") txt <- paste(prefix, txt, sep = "")
if(missing(cex.cor)) if(missing(cex.cor))
cex <- 0.8/strwidth(txt) cex <- 0.8/strwidth(txt)
test <- cor.test(x,y) test <- cor.test(x,y)
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("***", "**", "*", ".", " ")) symbols = c("***", "**", "*", ".", " "))
text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5) text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5)
text(.8, .8, Signif, cex = cex, col = 2) text(.8, .8, Signif, cex = cex, col = 2)
} }
## plot ## plot
set.caption(sprintf('Scatterplot matrix%s', ifelse(quick.plot, ' (based on a sample size of 1000)', ''))) set.caption(sprintf('Pontdiagram%s', ifelse(quick.plot, ' (n = 1000)', '')))
pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels) pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels)
} }
%> %>
original text
changed text