-34 Removals
+40 Additions
1<!--head 1<!--head
2Title: Correlations 2Title: Korrelációs együtthatók
3Author: Daróczi Gergely 3Author: Daróczi Gergely
4Email: gergely@snowl.net 4Email: gergely@snowl.net
5Description: This template will return the correlation matrix of supplied numerical variables. 5Description: Folytonos változók közötti lineáris összefüggések vizsgálata. ## TODO: update
6Data required: TRUE 6Data required: TRUE
7Example: rapport('correlations', data=ius2008, vars=c('age', 'edu')) 7Strict: TRUE
8 rapport('correlations', data=ius2008, vars=c('age', 'edu', 'leisure')) 8Example: rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu'))
9 rapport('correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb')) 9 rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu', 'leisure'))
10 rapport('i18n/hu/correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'))
10 11
11vars | *numeric[2,50]| Variable | Numerical variables 12vars | *numeric[2,50]| Változók | Folytonos változók
12cor.matrix | TRUE | Correlation matrix | Show correlation matrix (numbers)? 13cor.matrix | TRUE | Korrelációs mátrix | Korrelációs mátrix hozzáadása
13cor.plot | TRUE | Scatterplot matrix | Show scatterplot matrix (image)? 14cor.plot | TRUE | Pontdiagram | Pontdiagram hozzáadása
14quick.plot | TRUE | Using a sample for plotting | If set to TRUE, the scatterplot matrix will be drawn on a sample size of max. 1000 cases not to render millions of points. 15quick.plot | TRUE | Minta ábrázolása | A teljes adatbázis helyett egy maximum 1000 fős minta kerül ábrázolásra.
15head--> 16head-->
16 17
17# Variable description 18<%
19## setting Hungarian locale and returning NULL not be exported to report
20options('p.copula' = 'és'); NULL
21%>
18 22
19<%=length(vars)%> variables provided. 23# Változó-információk
24
25<%=length(vars)%> változó vizsgálata:
20 26
21<%= 27<%=
22cm <- cor(vars, use = 'complete.obs') 28cm <- cor(vars, use = 'complete.obs')
23diag(cm) <- NA 29diag(cm) <- NA
24%> 30%>
25 31
26<%if (length(vars) >2 ) {%> 32<%if (length(vars) >2 ) {%>
27The highest correlation coefficient (<%=max(cm, na.rm=T)%>) is between <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%> and the lowest (<%=min(cm, na.rm=T)%>) is between <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%>. It seems that the strongest association (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) is between <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%>. 33A legmagasabb korrelációs együtthatót (<%=max(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%>, és a legalacsonyabb értéket (<%=min(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%> változók között találjuk. Úgy tűnik, hogy a legerősebb kapcsolat (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) a(z) <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%> változók között található.
28<%}%> 34<%}%>
29 35
30<% 36<%
31cm[upper.tri(cm)] <- NA 37cm[upper.tri(cm)] <- NA
32h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T) 38h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T)
33if (nrow(h) > 0) { 39if (nrow(h) > 0) {
34%> 40%>
35 41
36Highly correlated (r < -0.7 or r > 0.7) variables: 42Erős összefüggést mutató (r < -0.7 or r > 0.7) változók:
37 43
38<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%> 44<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%>
39 45
40<%} else {%> 46<%} else {%>
41 47
42There are no highly correlated (r < -0.7 or r > 0.7) variables. 48Nincsenek erős összefüggést mutató (r < -0.7 or r > 0.7) változók.
43<%}%> 49<%}%>
44 50
45<% 51<%
46h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T) 52h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T)
47if (nrow(h) > 0) { 53if (nrow(h) > 0) {
48%> 54%>
49 55
50Uncorrelated (-0.2 < r < 0.2) variables: 56Korrelálatlan (-0.2 < r < 0.2) változók:
51 57
52<%= 58<%=
53if (nrow(h) > 0) 59if (nrow(h) > 0)
54 paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n') 60 paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')
55%> 61%>
56 62
57<%} else {%> 63<%} else {%>
58 64
59There are no uncorrelated correlated (r < -0.2 or r > 0.2) variables. 65Nincsenek korrelálatlan (-0.2 < r < 0.2) változók.
60<%}%> 66<%}%>
61 67
62## <%=if (cor.matrix) 'Correlation matrix'%> 68## <%=if (cor.matrix) 'Korrelációs mátrix'%>
63 69
64<%= 70<%=
65if (cor.matrix) { 71if (cor.matrix) {
66 set.caption('Correlation matrix') 72 set.caption('Correlation matrix')
67 cm <- round(cor(vars, use = 'complete.obs'), 4) 73 cm <- round(cor(vars, use = 'complete.obs'), 4)
68 d <- attributes(cm) 74 d <- attributes(cm)
69 for (row in attr(cm, 'dimnames')[[1]]) 75 for (row in attr(cm, 'dimnames')[[1]])
70 for (col in attr(cm, 'dimnames')[[2]]) { 76 for (col in attr(cm, 'dimnames')[[2]]) {
71 test.p <- cor.test(vars[, row], vars[, col])$p.value 77 test.p <- cor.test(vars[, row], vars[, col])$p.value
72 cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='') 78 cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='')
73 } 79 }
74 diag(cm) <- '' 80 diag(cm) <- ''
75 set.alignment('centre', 'right') 81 set.alignment('centre', 'right')
76 as.data.frame(cm) 82 as.data.frame(cm)
77} 83}
78%> 84%>
79 85
80Where the stars represent the [significance levels](http://en.wikipedia.org/wiki/Statistical_significance) of the bivariate correlation coefficients: one star for `0.05`, two for `0.01` and three for `0.001`. 86Ahol a csillagok száma a [szignifikancia szintet](http://en.wikipedia.org/wiki/Statistical_significance) jelöli: egy csillag `0,05`, kettő `0,01` és három csillag `0.001` p értéknél.
81 87
82<%= 88<%=
83if (cor.plot) { 89if (cor.plot) {
84 90
85 labels <- lapply(vars, rp.name) 91 labels <- lapply(vars, rp.name)
86 92
87 if (quick.plot) 93 if (quick.plot)
88 if (nrow(vars) > 1000) 94 if (nrow(vars) > 1000)
89 vars <- vars[sample(1:nrow(vars), size = 1000), ] 95 vars <- vars[sample(1:nrow(vars), size = 1000), ]
90 96
91 ## custom panels 97 ## custom panels
92 panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { 98 panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
93 99
94 ## forked from ?pairs 100 ## forked from ?pairs
95 par(usr = c(0, 1, 0, 1)) 101 par(usr = c(0, 1, 0, 1))
96 r <- cor(x, y, use = 'complete.obs') 102 r <- cor(x, y, use = 'complete.obs')
97 txt <- format(c(r, 0.123456789), digits = digits)[1] 103 txt <- format(c(r, 0.123456789), digits = digits)[1]
98 txt <- paste(prefix, txt, sep = "") 104 txt <- paste(prefix, txt, sep = "")
99 if(missing(cex.cor)) 105 if(missing(cex.cor))
100 cex <- 0.8/strwidth(txt) 106 cex <- 0.8/strwidth(txt)
101 test <- cor.test(x,y) 107 test <- cor.test(x,y)
102 Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, 108 Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
103 cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), 109 cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
104 symbols = c("***", "**", "*", ".", " ")) 110 symbols = c("***", "**", "*", ".", " "))
105 text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5) 111 text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5)
106 text(.8, .8, Signif, cex = cex, col = 2) 112 text(.8, .8, Signif, cex = cex, col = 2)
107 } 113 }
108 114
109 ## plot 115 ## plot
110 set.caption(sprintf('Scatterplot matrix%s', ifelse(quick.plot, ' (based on a sample size of 1000)', ''))) 116 set.caption(sprintf('Pontdiagram%s', ifelse(quick.plot, ' (n = 1000)', '')))
111 pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels) 117 pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels)
112 118
113} 119}
114%> 120%>
Editor
Original Text
Changed Text
Recommended videos