34 removals
40 additions
1.<!--head 1.<!--head
2.Title: Correlations 2.Title: Korrelációs együtthatók
3.Author: Daróczi Gergely 3.Author: Daróczi Gergely
4.Email: gergely@snowl.net 4.Email: gergely@snowl.net
5.Description: This template will return the correlation matrix of supplied numerical variables. 5.Description: Folytonos változók közötti lineáris összefüggések vizsgálata. ## TODO: update
6.Data required: TRUE 6.Data required: TRUE
7.Example: rapport('correlations', data=ius2008, vars=c('age', 'edu')) 7.Strict: TRUE
8. rapport('correlations', data=ius2008, vars=c('age', 'edu', 'leisure')) 8.Example: rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu'))
9. rapport('correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb')) 9. rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu', 'leisure'))
10. rapport('i18n/hu/correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'))
10. 11.
11.vars | *numeric[2,50]| Variable | Numerical variables 12.vars | *numeric[2,50]| Változók | Folytonos változók
12.cor.matrix | TRUE | Correlation matrix | Show correlation matrix (numbers)? 13.cor.matrix | TRUE | Korrelációs mátrix | Korrelációs mátrix hozzáadása
13.cor.plot | TRUE | Scatterplot matrix | Show scatterplot matrix (image)? 14.cor.plot | TRUE | Pontdiagram | Pontdiagram hozzáadása
14.quick.plot | TRUE | Using a sample for plotting | If set to TRUE, the scatterplot matrix will be drawn on a sample size of max. 1000 cases not to render millions of points. 15.quick.plot | TRUE | Minta ábrázolása | A teljes adatbázis helyett egy maximum 1000 fős minta kerül ábrázolásra.
15.head--> 16.head-->
16. 17.
17.# Variable description 18.<%
19.## setting Hungarian locale and returning NULL not be exported to report
20.options('p.copula' = 'és'); NULL
21.%>
18. 22.
19.<%=length(vars)%> variables provided. 23.# Változó-információk
24.
25.<%=length(vars)%> változó vizsgálata:
20. 26.
21.<%= 27.<%=
22.cm <- cor(vars, use = 'complete.obs') 28.cm <- cor(vars, use = 'complete.obs')
23.diag(cm) <- NA 29.diag(cm) <- NA
24.%> 30.%>
25. 31.
26.<%if (length(vars) >2 ) {%> 32.<%if (length(vars) >2 ) {%>
27.The highest correlation coefficient (<%=max(cm, na.rm=T)%>) is between <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%> and the lowest (<%=min(cm, na.rm=T)%>) is between <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%>. It seems that the strongest association (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) is between <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%>. 33.A legmagasabb korrelációs együtthatót (<%=max(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%>, és a legalacsonyabb értéket (<%=min(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%> változók között találjuk. Úgy tűnik, hogy a legerősebb kapcsolat (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) a(z) <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%> változók között található.
28.<%}%> 34.<%}%>
29. 35.
30.<% 36.<%
31.cm[upper.tri(cm)] <- NA 37.cm[upper.tri(cm)] <- NA
32.h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T) 38.h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T)
33.if (nrow(h) > 0) { 39.if (nrow(h) > 0) {
34.%> 40.%>
35. 41.
36.Highly correlated (r < -0.7 or r > 0.7) variables: 42.Erős összefüggést mutató (r < -0.7 or r > 0.7) változók:
37. 43.
38.<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%> 44.<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%>
39. 45.
40.<%} else {%> 46.<%} else {%>
41. 47.
42.There are no highly correlated (r < -0.7 or r > 0.7) variables. 48.Nincsenek erős összefüggést mutató (r < -0.7 or r > 0.7) változók.
43.<%}%> 49.<%}%>
44. 50.
45.<% 51.<%
46.h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T) 52.h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T)
47.if (nrow(h) > 0) { 53.if (nrow(h) > 0) {
48.%> 54.%>
49. 55.
50.Uncorrelated (-0.2 < r < 0.2) variables: 56.Korrelálatlan (-0.2 < r < 0.2) változók:
51. 57.
52.<%= 58.<%=
53.if (nrow(h) > 0) 59.if (nrow(h) > 0)
54. paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n') 60. paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')
55.%> 61.%>
56. 62.
57.<%} else {%> 63.<%} else {%>
58. 64.
59.There are no uncorrelated correlated (r < -0.2 or r > 0.2) variables. 65.Nincsenek korrelálatlan (-0.2 < r < 0.2) változók.
60.<%}%> 66.<%}%>
61. 67.
62.## <%=if (cor.matrix) 'Correlation matrix'%> 68.## <%=if (cor.matrix) 'Korrelációs mátrix'%>
63. 69.
64.<%= 70.<%=
65.if (cor.matrix) { 71.if (cor.matrix) {
66. set.caption('Correlation matrix') 72. set.caption('Correlation matrix')
67. cm <- round(cor(vars, use = 'complete.obs'), 4) 73. cm <- round(cor(vars, use = 'complete.obs'), 4)
68. d <- attributes(cm) 74. d <- attributes(cm)
69. for (row in attr(cm, 'dimnames')[[1]]) 75. for (row in attr(cm, 'dimnames')[[1]])
70. for (col in attr(cm, 'dimnames')[[2]]) { 76. for (col in attr(cm, 'dimnames')[[2]]) {
71. test.p <- cor.test(vars[, row], vars[, col])$p.value 77. test.p <- cor.test(vars[, row], vars[, col])$p.value
72. cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='') 78. cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='')
73. } 79. }
74. diag(cm) <- '' 80. diag(cm) <- ''
75. set.alignment('centre', 'right') 81. set.alignment('centre', 'right')
76. as.data.frame(cm) 82. as.data.frame(cm)
77.} 83.}
78.%> 84.%>
79. 85.
80.Where the stars represent the [significance levels](http://en.wikipedia.org/wiki/Statistical_significance) of the bivariate correlation coefficients: one star for `0.05`, two for `0.01` and three for `0.001`. 86.Ahol a csillagok száma a [szignifikancia szintet](http://en.wikipedia.org/wiki/Statistical_significance) jelöli: egy csillag `0,05`, kettő `0,01` és három csillag `0.001` p értéknél.
81. 87.
82.<%= 88.<%=
83.if (cor.plot) { 89.if (cor.plot) {
84. 90.
85. labels <- lapply(vars, rp.name) 91. labels <- lapply(vars, rp.name)
86. 92.
87. if (quick.plot) 93. if (quick.plot)
88. if (nrow(vars) > 1000) 94. if (nrow(vars) > 1000)
89. vars <- vars[sample(1:nrow(vars), size = 1000), ] 95. vars <- vars[sample(1:nrow(vars), size = 1000), ]
90. 96.
91. ## custom panels 97. ## custom panels
92. panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { 98. panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
93. 99.
94. ## forked from ?pairs 100. ## forked from ?pairs
95. par(usr = c(0, 1, 0, 1)) 101. par(usr = c(0, 1, 0, 1))
96. r <- cor(x, y, use = 'complete.obs') 102. r <- cor(x, y, use = 'complete.obs')
97. txt <- format(c(r, 0.123456789), digits = digits)[1] 103. txt <- format(c(r, 0.123456789), digits = digits)[1]
98. txt <- paste(prefix, txt, sep = "") 104. txt <- paste(prefix, txt, sep = "")
99. if(missing(cex.cor)) 105. if(missing(cex.cor))
100. cex <- 0.8/strwidth(txt) 106. cex <- 0.8/strwidth(txt)
101. test <- cor.test(x,y) 107. test <- cor.test(x,y)
102. Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, 108. Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
103. cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), 109. cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
104. symbols = c("***", "**", "*", ".", " ")) 110. symbols = c("***", "**", "*", ".", " "))
105. text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5) 111. text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5)
106. text(.8, .8, Signif, cex = cex, col = 2) 112. text(.8, .8, Signif, cex = cex, col = 2)
107. } 113. }
108. 114.
109. ## plot 115. ## plot
110. set.caption(sprintf('Scatterplot matrix%s', ifelse(quick.plot, ' (based on a sample size of 1000)', ''))) 116. set.caption(sprintf('Pontdiagram%s', ifelse(quick.plot, ' (n = 1000)', '')))
111. pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels) 117. pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels)
112. 118.
113.} 119.}
114.%> 120.%>
original text
changed text