SSiz corr coeff

Content Disclaimer
Copyright @2020.
All Rights Reserved.

StatsToDo: Sample Size for Estimating Pearson's Correlation Coefficient ρ

Links : Home Index (Subjects) Contact StatsToDo

Explanations and References Sample Size Table Javascript Program

Iput Data

Sample Size : Data input a table of 3 columns
  - Each row contains data from a separate study
  - Col 1 = probability of Type I error (alpha)
  - Col 2 = power (1-beta)
  - Col 3 = expected correlation coefficient ρ

Power : Data input a table of 3 columns
  - Each row contains data from a separate study
  - Col 1 = probability of Type I error (α)
  - Col 2 = sample size in the study
  - Col 3 = correlation coefficient (ρ) observed

Confidence Intervals : Data input a table of 3 columns
  - Each row contains data from a separate study
  - Col 1 = percent confidence (usually 95)
  - Col 2 = sample size
  - Col 3 = correlation coefficient (ρ) observed

Pilot Studies : Data input a single plan, a single column with 4 rows
  -Row 1 : Percent Confidence required, usually 95 or 99
  -Row 2 : Planned Correlation coefficient (ρ)
  -Row 3 : Sample Size Interval, usually 5
  -Row 4 : Maximum Sample size, usually 50

R Codes

#  Sample Size
# subroutine
SSizRho <- function(alpha,beta,r,tail)
{
   r = abs(r)
   if(r<0.00001 | r>0.99999)
   {
     return (0)
   }
   za = qnorm(alpha / tail);
   zb = qnorm(beta);
   gamma = (za + zb)^2
   n = 0.5 * log((1.0 + r) / (1.0 - r))
   oldn = 0
   iterate = 0
   while((iterate<1000) & (abs(oldn - n)>0.00001))
   {
      oldn = n
      mu = 0.5 * log((1.0 + r) / (1.0 - r)) + (r /(2.0 * (n-1)))
      n = gamma / mu^2 + 3.0    
      iterate = iterate + 1
  }
  if(iterate>=1000)
  {
    return (0)   
  }
  return (ceiling(n))
}
# main ssiz program
# data entry
dat = ("
Alpha Power Rho
0.05	0.8	  0.6
0.01	0.8	  0.6
0.05	0.9	  0.6
0.01	0.9	  0.6
       ")
df <- read.table(textConnection(dat),header=TRUE)  # conversion to data frame    
# vectors for sample size results
SSiz1Tail <- vector()
SSiz2Tail <- vector()
# calculations
for(i in 1 : nrow(df))
{
  alpha = df$Alpha[i]
  beta = 1 - df$Power[i]
  rho = df$Rho[i]
  SSiz1Tail <- append(SSiz1Tail, SSizRho(alpha,beta,rho,1)) # 1 tail
  SSiz2Tail <- append(SSiz2Tail, SSizRho(alpha,beta,rho,2)) # 2 tail
}
# results to data frame for display
df$SSiz1Tail <- SSiz1Tail
df$SSiz2Tail <- SSiz2Tail
df # data frame with input data and resukts

The results are as follows

Alpha = probability of Type I Error α p
Power = 1 - β where β = probability of Type II Error
Rho = correlation coefficient
SSiz = sample size, 1 and 2 tail

> df # data frame with input data and resukts
  Alpha Power Rho SSiz1Tail SSiz2Tail
1  0.05   0.8 0.6        16        19
2  0.01   0.8 0.6        24        27
3  0.05   0.9 0.6        21        25
4  0.01   0.9 0.6        30        34

# Program 2: Power
# data entry
dat = ("
Alpha SSiz  Rho
0.05  16    0.6
0.01  24    0.6
0.05  21    0.6
0.01  30    0.6
       ")
df <- read.table(textConnection(dat),header=TRUE)  # conversion to data frame    
# vectors for results
Power1Tail <- vector()
Power2Tail <- vector()
# Calculations
for(i in 1 : nrow(df))
{
  alpha = df$Alpha[i]
  ssiz = df$SSiz[i]
  rho = df$Rho[i]
  if(ssiz<4 | rho<0.00001 | rho>0.99999)  # not calculable
  {
    Power1Rail <- append(Power1Tail,0)
    Power2Rail <- append(Power2Tail,0)
  }
  else
  {
    mu = 0.5 * log((1.0 + rho) / (1.0 - rho)) + (rho / (2.0 * (ssiz - 1.0)))
    za = abs(qnorm(alpha))   # 1 tail
    Power1Tail <- append(Power1Tail,pnorm(mu * sqrt(ssiz - 3) - za))
    za = abs(qnorm(alpha / 2))   # 2 tail
    Power2Tail <- append(Power2Tail,pnorm(mu * sqrt(ssiz - 3) - za))
  }
}  
# combine results into data frame for display
df$Power1Tail <- Power1Tail
df$Power2Tail <- Power2Tail
df # show data input and power results

The results are as follows

Alpha = probability of Type I Error α, p
SSiz = sample size of data
Rho = correlation coefficient observed ρ
Power = 1 - β, 1 and 2 tail

> df # show data input and power results
  Alpha SSiz Rho Power1Tail Power2Tail
1  0.05   16 0.6  0.8228900  0.7295077
2  0.01   24 0.6  0.8185424  0.7454831
3  0.05   21 0.6  0.9130155  0.8518614
4  0.01   30 0.6  0.9080931  0.8598448

program 3 : Confidence Interval Firstly the subroutine used by both 1 and 2 tail estimates

ConfIntv <- function(pc,n,r,tail)  # %conf, ssiz, rho, tail
{
  alpha = (1 - pc/100)
  za = abs(qnorm(alpha / tail))
  z = log((1 + r) / (1 - r)) / 2
  se = sqrt(n - 3)
  f = z - za / se            # lower limit
  ll = (exp(2 * f) - 1) / (exp(2 * f) + 1)
  g = z + za / se            # upper limit
  ul = (exp(2 * g) - 1) / (exp(2 * g) + 1)
  return (c(ll, ul))
}

Now the main program

#Main Program 3: Confidence Interval
# data entry
dat = ("
Pc SSiz Rho
95  16  0.6
99  24  0.6
95  21  0.6
99  30  0.6
       ")
df <- read.table(textConnection(dat),header=TRUE)  # conversion to data frame    
# vectors for results
LL1Tail <- vector()  # lower limit 1 tail
UL1Tail <- vector()  # upper limit 1 tail
LL2Tail <- vector()  # lower limit 2 tail
UL2Tail <- vector()  # upper limit 2 tail
# Calculations
for(i in 1 : nrow(df))
{
  pc = df$Pc[i]
  ssiz = df$SSiz[i]
  rho = df$Rho[i]
  resAr = ConfIntv(pc,ssiz,rho,1) # 1 tail
  LL1Tail <- append(LL1Tail, resAr[1])  # lower limit 1 tail
  UL1Tail <- append(UL1Tail, resAr[2])  # upper limit 1 tail
  resAr = ConfIntv(pc,ssiz,rho,2) # 2 tail
  LL2Tail <- append(LL2Tail, resAr[1])  # lower limit 2 tail
  UL2Tail <- append(UL2Tail, resAr[2])  # upper limit 2 tail
}
# combine input data and results for display
df$LL1Tail <- LL1Tail
df$UL1Tail <- UL1Tail
df$LL2Tail <- LL2Tail
df$UL2Tail <- UL2Tail
df # Input data and confidence interval results

The results are as follows

Pc = % confidence of interpretation
SSiz = sample size of data
Rho = correlation coefficient observed ρ
LL = lower limit and UL = upper limit of confidence interval, 1 and 2 tail

> df # Input data and confidence interval results
  Pc SSiz Rho   LL1Tail   UL1Tail   LL2Tail   UL2Tail
1 95   16 0.6 0.2326099 0.8175378 0.1484459 0.8445242
2 99   24 0.6 0.1833978 0.8338978 0.1303100 0.8497463
3 95   21 0.6 0.2962935 0.7935115 0.2271470 0.8194415
4 99   30 0.6 0.2406287 0.8147011 0.1949028 0.8302268

Program 4: Pilot study

Firstly the subroutine for confidence interval, which is the same as that for Program 3

ConfIntv <- function(pc,n,r,tail)  # %conf, ssiz, rho, tail
{
  alpha = (1 - pc/100)
  za = abs(qnorm(alpha / tail))
  z = log((1 + r) / (1 - r)) / 2
  se = sqrt(n - 3)
  f = z - za / se            # lower limit
  ll = (exp(2 * f) - 1) / (exp(2 * f) + 1)
  g = z + za / se            # upper limit
  ul = (exp(2 * g) - 1) / (exp(2 * g) + 1)
  return (c(ll, ul))
}

Now the main program

# Pgm 4 : Pilot studies 
# Parameters
pc = 95         # % confidence
rho = 0.6       # correlation coefficient rho
intv = 5        # interval
maxN = 100      # maximum sample size
# vectors for results
SSiz <- vector()     # sample size
CI1 <- vector()      # confidence interval 1 tail
Diff1 <- vector()    # difference in CI from previous row 1 tail
DecCase1 <- vector() # decrease in CI per case increase 1 tail
PDCase1 <- vector()  # % decrease in CI per case increase 1 tail
CI1 <- vector()      # confidence interval 1 tail
CI2 <- vector()      # confidence interval 2 tail
Diff2 <- vector()    # difference in CI from previous row 2 tail
DecCase2 <- vector() # decrease in CI per case increase 2 tail
PDCase2 <- vector()  # % decrease in CI per case increase 2 tail
# Calculations
n = intv
SSiz <- append(SSiz,n)
resAr = ConfIntv(pc,n,rho,1) # 1 tail
ci1 = resAr[2] - resAr[1]
CI1 <- append(CI1,sprintf(ci1, fmt="%#.4f"))      # confidence interval 1 tail
Diff1 <- append(Diff1,0)    # difference in CI from previous row 1 tail
DecCase1 <- append(DecCase1,0) # decrease in CI per case increase 1 tail
PDCase1 <- append(PDCase1,0)  # % decrease in CI per case increase 1 tail
resAr <- ConfIntv(pc,n,rho,2) # 2 tail
ci2 = resAr[2] - resAr[1]
CI2 <- append(CI2,sprintf(ci2, fmt="%#.4f"))      # confidence interval 1 tail
Diff2 <- append(Diff2,0)    # difference in CI from previous row 1 tail
DecCase2 <- append(DecCase2,0) # decrease in CI per case increase 1 tail
PDCase2 <- append(PDCase2,0)  # % decrease in CI per case increase 1 tail
# subsequent rows
while(n < maxN)
{
  n = n + intv
  SSiz <- append(SSiz,n)
  oldci1 = ci1
  resAr = ConfIntv(pc,n,rho,1) # 1 tail
  ci1 = resAr[2] - resAr[1]
  CI1 <- append(CI1,sprintf(ci1, fmt="%#.4f"))      # confidence interval 1 tail
  diff1 = oldci1 - ci1
  Diff1 <- append(Diff1,sprintf(diff1, fmt="%#.4f"))    # difference in CI from previous row 1 tail
  decCase1 = diff1 / intv
  DecCase1 <- append(DecCase1,sprintf(decCase1, fmt="%#.4f")) # decrease in CI per case increase 1 tail
  pDCase1 = sprintf(decCase1 / oldci1 * 100, fmt="%#.1f")
  PDCase1 <- append(PDCase1,pDCase1)  # % decrease in CI per case increase 1 tail
  
  oldci2 = ci2
  resAr = ConfIntv(pc,n,rho,2) # 2 tail
  ci2 = resAr[2] - resAr[1]
  CI2 <- append(CI2,sprintf(ci2, fmt="%#.4f"))      # confidence interval 2 tail
  diff2 = oldci2 - ci2
  Diff2 <- append(Diff2,sprintf(diff2, fmt="%#.4f"))    # difference in CI from previous row 2 tail
  decCase2 = diff2 / intv
  DecCase2 <- append(DecCase2,sprintf(decCase2, fmt="%#.4f")) # decrease in CI per case increase 2 tail
  pDCase2 = sprintf(decCase2 / oldci2 * 100, fmt="%#.1f")
  PDCase2 <- append(PDCase2,pDCase2)  # % decrease in CI per case increase 2 tail
}
df <- data.frame(SSiz,CI1,Diff1,DecCase1,PDCase1,CI2,Diff2,DecCase2,PDCase2)
df # display results in data frame

The results are as follows The results are as follows

SSiz = sample size used (per group)
CI1 and CI2 = confidence interval for that sample size (1 and 2 tail)
Diff1 and Diff2 = difference in CI from previous row (1 and 2 tail)
DecCase1 and DecCase2 = decrease in CI per case increase (1 and 2 tail)
PDCase1 and PDCase2 = % decrease in CI per case increase, based on the previous row (1 and 2 tail)

> df # display results in data frame
   SSiz    CI1  Diff1 DecCase1 PDCase1    CI2  Diff2 DecCase2 PDCase2
1     5 1.3905      0        0       0 1.5690      0        0       0
2    10 0.7942 0.5963   0.1193     8.6 0.9401 0.6289   0.1258     8.0
3    15 0.6087 0.1855   0.0371     4.7 0.7241 0.2160   0.0432     4.6
4    20 0.5116 0.0971   0.0194     3.2 0.6094 0.1147   0.0229     3.2
5    25 0.4497 0.0619   0.0124     2.4 0.5359 0.0735   0.0147     2.4
6    30 0.4059 0.0438   0.0088     1.9 0.4837 0.0521   0.0104     1.9
7    35 0.3728 0.0331   0.0066     1.6 0.4443 0.0394   0.0079     1.6
8    40 0.3466 0.0261   0.0052     1.4 0.4132 0.0312   0.0062     1.4
9    45 0.3253 0.0213   0.0043     1.2 0.3878 0.0254   0.0051     1.2
10   50 0.3075 0.0178   0.0036     1.1 0.3665 0.0212   0.0042     1.1
11   55 0.2923 0.0152   0.0030     1.0 0.3484 0.0181   0.0036     1.0
12   60 0.2792 0.0131   0.0026     0.9 0.3327 0.0157   0.0031     0.9
13   65 0.2677 0.0115   0.0023     0.8 0.3190 0.0137   0.0027     0.8
14   70 0.2575 0.0102   0.0020     0.8 0.3069 0.0122   0.0024     0.8
15   75 0.2483 0.0091   0.0018     0.7 0.2960 0.0109   0.0022     0.7
16   80 0.2401 0.0082   0.0016     0.7 0.2862 0.0098   0.0020     0.7
17   85 0.2327 0.0074   0.0015     0.6 0.2773 0.0089   0.0018     0.6
18   90 0.2259 0.0068   0.0014     0.6 0.2692 0.0081   0.0016     0.6
19   95 0.2197 0.0062   0.0012     0.6 0.2618 0.0074   0.0015     0.6
20  100 0.2139 0.0057   0.0011     0.5 0.2550 0.0068   0.0014     0.5

StatsToDo: Sample Size for Estimating Pearson's Correlation Coefficient ρ

Fisher's Z Transformation

1 or 2 tail Models

Sample Size for Correlation and Regression

Sample size for non-parametric Correlation Coefficients

References