#############################################################################################################
#
#  warmup.r
#
#############################################################################################################

# For unitary commands, you can work directly in the console (after the automatic symbol >).
# As soon as you deal with a list of commands, it is better to create a script file.

help(mean)
?mean
example(mean)

# Do not hesitate to comment your script (using symbol #), especially when it becomes complicated.

# Now I apply Einstein's relativity to compute the curvature of my space-time

c <- 1 # Well, maybe I'm wrong...

# Try to use R as a calculator, all basic mathematical symbols exist.

(1+2)*(5-3)
(5/2)^3
sqrt(25)
abs(-5)
cos(pi/3) # Degrees or Radians ?
19%%3 # What does it mean ?

# Use an arrow to assign a value to a variable. Look at the variable's content directly
# by its name or through the print function.

a <- 2
print(a)
b <- -3.2
b

# Standard types are automatically handled : integer, float, complex, boolean, string, etc.
# Use class to check the type of a variable. Belonging to the same class provides access
# to the same functions (just like object-oriented programming).

exInt <- 2
class(exInt)
exFlo <- pi
class(exFlo) # Both floats and integers are numeric
exCom <- 1+1i
class(exCom) # But complex numbers are not numeric...
exBoo <- TRUE
class(exBoo)
exStr <- "hello"
class(exStr)

# Try to manipulate and combine them, using the comparison operators and their specific functions.

deg360 <- exInt*exFlo
Mod(exCom) == sqrt(2)
deg360 < pi/2
exCom - Re(exCom) - 1i*Im(exCom)
!exBoo
paste(exStr, "everybody", sep=" ")
exStr + exInt # Hmm, what was I thinking ?

# R has the advantage (or the defect) to proceed to mathematically questionable operations.
# Special characters exist to handle such results, do not hesitate to check your calculations.

bigValue <- 1/0
is.finite(bigValue)
is.infinite(bigValue)
undefValue <- 0/0
is.nan(undefValue) # What is NaN ?
sqrt(-1)
is.nan(1i)
1i == sqrt(-1) # What is NA ?
0+1i == sqrt(-1+0i)

# A vector in R is treated as a column of values. Some shortcut functions exist to deal
# with values having a logical progression, and usual operations and comparisons of vectors are available.

V1 <- c(1, 2, 5, -1, 2)
V2 <- 1:5
c(V1, V2)
V1*V2 # Is it a scalar product ?
length(V1)
V1[3]
V2[10] # Why ?
V3 <- seq(-5, 5, by=2)
10:0
t(10:0) # What's the difference ?
V4 <- rep(1, 6)
V3 >= V4
V3%*%V4
which(V3 < 2)
sort(V3)
sort(V3, decreasing=TRUE)

# Sometimes we may be required to change dynamically the length of a vector,
# because we have no prior information on the amount of data to be stored.

EmptyVec <- c() # Empty vector
length(EmptyVec)
EmptyVec <- c(EmptyVec, 0)
length(EmptyVec) # Note that () =/= (0)
EmptyVec <- c(EmptyVec, 1)
EmptyVec <- EmptyVec[-1]
length(EmptyVec)

# We create a matrix from a vector, specifying the number of rows or columns needed.

M <- matrix(c(2, 3, 5, 7, 11, 13), ncol=2)
M
dim(M)
nrow(M)
ncol(M)
N <- matrix(c(2, 3, 5, 7, 11, 13), ncol=2, byrow=TRUE)
N
Z5 <- matrix(0, nrow=5, ncol=5)
I5 <- diag(5)
diag(1:5)
diag(I5) # What's the difference ?
M[1,2]
M[3,4] # Why ?
M[3,]
M[2:3,2]
M[-2,]

# We add rows or columns using rbind and cbind.
# As for vectors, it enables to change dynamically the dimensions of the matrix.

rbind(M, N)
cbind(M, N)

# Like vectors, usual operations and comparisons of matrices are available.
# As it is shown in the examples below, they have to be carefully used.

M+N
M-N
M/N # What is this strange division between matrices ?
M*N
t(M)%*%N # What's the difference ?
M^2 # Is it a matrix product ?
A <- matrix(c(1, 3, 2, -4), nrow=2)
eigen(A) # How to access to values and vectors separately ?
det(A)
solve(A) # Why `solve' to inverse ?
A == 1

# A list is a generic vector that may contain different objects having a label.

V <- c(158, 124, 182)
a <- 22
s <- 1.85
n <- "Jon Snow"
Indiv <- list(Name = n, Size = s, Age = a, KilledEnemies = V, isAStark = TRUE)
Indiv
Indiv[[1]]
Indiv$Age
Indiv[[4]][1] <- Indiv[[4]][1]+1 # The fourth element of the list is a vector
Indiv$KilledEnemies
summary(Indiv)

# A dataframe is a generic matrix that may contain different types of rows or columns, having a label.

DF <- data.frame(C1 = 1, C2 = 1:10, C3 = letters[1:10])
DF
colnames(DF)
dim(DF)
DF[3:5,]
DF[-10,]
rbind(DF, c(1, 1, "a"))
DF # Why didn't it change ?
DF <- cbind(DF, 10:1)
colnames(DF)[4] <- "C2inv"
rownames(DF) <- paste("R", 1:10, sep="")
DF["R3","C2inv"]

# For numeric vectors, descriptive statistics are easily handled with the numerous associated functions.

n <- 1000
X <- rnorm(n, mean=3, sd=2)
m <- mean(X)
var(X)
sum((X-m)^2)/n # The difference ?
sum((X-m)^2)/(n-1)
median(X)
quantile(X)
quantile(X, probs=c(0.3, 0.6, 0.9))
min(X)
max(X)

# Like the usual programming languages, R is able to deal with conditions and loops.
# Note that we use == to test for equality whereas we use != to test for difference
# and <, <=, >, >= to test for comparisons.

a <- 1
b <- 2
(a == 1) # Essential, crucial : see the difference between `a = 1' and `a == 1'
(b == 1)
(a != 1)
(a == 1) | (b == 1)
(a == 1) & (b == 1)
!(b == 1)
(b != 1) == !(b == 1) # What ??
(a == 1) | (b == 2)
xor((a == 1), (b == 2)) # What's the difference between `or' and `xor' ?

# The syntax is if (cond) { instr } else { instr } where the else block is optional.
# An ifelse shorcut is also available.

# Let's flip a coin
x <- runif(1)
if (x < 0.5) {
    print("Heads")
} else {
    print("Tails")
}

ifelse(runif(1) < 0.5, "Heads", "Tails")

# The syntax is for (var in seq) { instr }.

# Let's enumerate the alphabet
for (i in 1:length(letters)) {
    print(letters[i])
}

# Note that the sequence is not necessarily numeric, for example we can look through a list.

# What are the registered properties of Indiv ?
for (prop in Indiv) {
    print(prop)
}

# The syntax is while (cond) { instr }.

# Let's compute the sum of the first n terms of a geometric sequence
q <- 1/3
n <- 20
s <- 0
i <- 0
while (i <= (n-1)) {
    s <- s+q^i
    i <- i+1
}
print(paste("Sum :", s))
print((1-q^n)/(1-q)) # Faster ?

# The syntax is repeat { instr } if (cond) { break } .

# Let's compute the terms of an arithmetic sequence until it exceed N
r <- 1/3
N <- 100
s <- 0
i <- 0
repeat {
    s <- s + r
    i <- i+1
    if (s > N) {
        break
    }
}
print(paste("Index :", i))
print(paste("Value :", s))

## Functions

# We can also define our own functions. The syntax is name = function(arg) { instr return(var) },
# where the return command is optional. Some examples are provided below.

# If your function does not need to return any value, then do not use the return command.

# Flip n coins with heads probability p
flipcoins <- function(n, p) {
    for (i in 1:n) {
        x <- runif(1)
        if (x < p) {
            print("Heads")
        } else {
            print("Tails")
        }
    }
}

flipcoins(10, 0.1)
flipcoins(15, 0.5)
flipcoins(2, 0.9)

# Use return(val) to return the result of a treatment in your function.

# Concatenate 3 vectors into a single matrix
concat <- function(V1, V2, V3) {
    Mat <- cbind(V1, V2, V3)
    return(Mat)
}

M <- concat(c(1,0,0), c(0,1,0), c(0,0,1))
M <- concat(rnorm(10), runif(10), rbinom(10,5,0.2))

# A simple method to produce more than one output is to create a list with all required variables.

# Estimate mean and variance of a sample
estimMV <- function(Sample) {
    m <- mean(Sample)
    v <- var(Sample)
    out <- list(Mean = m, Var = v)
    return(out)
}

Est <- estimMV(rnorm(100, 1, sqrt(3)))
print(Est$Mean)
print(Est$Var)

Est <- estimMV(runif(100, -2, 2))
print(Est$Mean)
print(Est$Var)

## Basic graphic tools

# The usual functions applied to the 2D graphical representations are plot, lines, curve and points.
# Do not hesitate to look at help(plot) to get an overview of the numerous opportunities.

# Try to change pch, col, type, lwd or lty arguments. Look also at xlim,
# ylim, main, xlab or ylab to decorate the graph.

# Discrete representation of f(x) = ln(x^2 + 1/x^2)
X <- seq(-4, 4, by=0.01)
Y <- log(X^2+1/X^2)
plot(X, Y, col="blue")

plot(X, Y, col="blue", pch=3)
plot(X, Y, col="blue", type="l", main="Graph")
plot(X, Y, col="magenta", type="l", lwd=3, lty=2, xlab="Abs. X", ylab="Ord. Y")

# Discrete representations of f(x) = ln(x^2 + 1/x^2) and g(x) = -x^2+6
Z <- -X^2+6
plot(X, Y, col="magenta", type="l", lwd=3, lty=2, xlab="Abs. X", ylab="Ord. Y")
lines(X, Z, type="l", lwd=3, col="red")

# That's... nonsense, really
X <- rnorm(20)
Y <- rexp(20)
plot(X, Y, col="blue", type="p")
points(X+0.1, Y+0.1 , pch=2, col="red")
lines(sort(X), Y, lty=2, col="orange")
text(mean(X), max(Y), "Hello", col="magenta")

# Use of `curve' to get continuous representations of functions of x
curve(sin(x), from=-2*pi, to=2*pi, col="red", lwd=2, xlim=c(-4, 4), ylim=c(-1, 1))
curve(cos(x), from=-2*pi, to=2*pi, col="blue", lwd=2, add=TRUE)

# Add a grid
grid(col="lightgray", lty="dotted")

# Same example as above, with its legend

X <- seq(-4, 4, by=0.01)
Y <- log(X^2+1/X^2)
Z <- -X^2+6
plot(X, Y, col="magenta", type="l", lwd=3, lty=2, xlab="Abs. X", ylab="Ord. Y")
lines(X, Z, type="l", lwd=3, col="red")
legend("topright", c("f(x)", "g(x)"), col=c("magenta", "red"), lwd=c(3, 3), lty=c(2, 1))

## Statistical tools

# Histograms, boxplots, regression lines, kernel densities, ... are also easily available using R.
# Here are some examples.

# Histogram, density and boxplot of a standard Gaussian sample

X <- rnorm(1000)
hist(X, breaks=15, col="lightblue", border="blue", freq=FALSE, xlim=c(-4,4))
lines(density(X), col="red", lwd=2, lty=2)

boxplot(X, main="Boxplot of X", col=c("gold"))

# Regression line of a scatter plot

X <- 0.5*rnorm(100)
E <- rnorm(100)
Y <- 2 + 2.5*X + E
plot(X, Y, type="p", pch=3)
LinReg <- lm(Y~X)
summary(LinReg)