Journal club - 2D or 3D culture?
Chromosome missegregation and the consequent aneuploidy are lethal to normal cells, though, fitness of cancer cells is paradoxically enhanced by aneuploidy.
# Difference within or between group
group_dif = function(x){
# input: dataframe, first n column be group, last column be value
same_group_diff = {}
between_group_diff = {}
n = dim(x)[2]-1 # n group columns
for (i in 1:(dim(x)[1]-1)){
for (j in (i+1):dim(x)[1]){
diff = abs(x[i,n+1]- x[j,n+1])
if (sum(x[i,1:n]==x[j,1:n])==n){
# only all the group same will be considered as same.
same_group_diff = c(same_group_diff,diff)
} else {
between_group_diff = c(between_group_diff, diff)
}
}
}
return(list("same_group_diff"=same_group_diff,
"between_group_diff"=between_group_diff))
}
# sampling error and mean (for t test assumption)
sample_Se_Mean = function(x,n){
# x: vector data, n: bootstrap times
sampling_errors = vector()
sampling_means = vector()
for (replicate in 1:n){
BSsample = sample(x, size = length(x), replace = TRUE)
standard_error = sd(BSsample)/sqrt(length(BSsample))
sampling_errors = c(sampling_errors, standard_error)
sampling_means = c(sampling_means, mean(BSsample))
}
lmfit = lm(sampling_errors~sampling_means)
return(list("sampling_errors"=sampling_errors,"sampling_means"=sampling_means,
"lmfit"=lmfit))
}
# bootstrapping CI for binary counting data
bt_bin_CI = function(x,y,n){
# x:count of category 1, e.g. infected ; y: total count
# n: bootstrap times
# method: case resampling; output: confidence interval of x
obs_sample = c(rep("1",x),rep("2",y-x))
bt = 1:n
for (b in 1:n){
bt_sample = sample(obs_sample, y,replace = T)
bt[b] = sum(bt_sample=="1")
}
lower_ci = quantile(bt, 0.025)/y
upper_ci = quantile(bt, 0.975)/y
return(list("low_CI"=lower_ci,"up_CI"=upper_ci))
}
#bt_bin_CI(18,80796,100)
# bootstrapping CI for numeric data (between groups)
bt_num_CI = function(x,n){
# x: dataframe:gruop~value
# n: bootstrap times
# method: case resampling
allc = summary(as.factor(x[,1]))
result=list()
for (g in names(allc)){
means=1:n
for (i in 1:n){
means[i] = mean(sample(x[,2],allc[g],replace = T))
}
lower_ci = quantile(means, 0.025)
upper_ci = quantile(means, 0.975)
this_group = list(list("low_CI"=lower_ci, "up_CI"=upper_ci))
names(this_group) = g
result = c(result, this_group)
}
return(result)
}
#bt_num_CI(data.frame("Country"=c(rep("A",100),rep("B",100)),"Value"=rnorm(200)),100)
data = scan("/Users/jefft/Library/Mobile Documents/com~apple~CloudDocs/Year 2/ADS/Week 9 T-test/barley.txt")
# Independent random sampling on continuous variable.
head(data)
## [1] 41.03 45.99 50.01 51.44 49.53 47.47
# Normality of the sampling distribution.
sampling_means<-vector()
for (replicate in 1:100){
barley_sample = sample(data, size = length(data), replace = TRUE)
sampling_means = c(sampling_means, mean(barley_sample))
}
hist(sampling_means, xlab = "Sample means", main = "")
shapiro.test(sampling_means)
##
## Shapiro-Wilk normality test
##
## data: sampling_means
## W = 0.99381, p-value = 0.9317
# Independence of mean and variation (standard error).
sampling_errors = vector()
sampling_means = vector()
for (replicate in 1:100){
barley_sample = sample(data, size = length(data), replace = TRUE)
standard_error = sd(barley_sample)/sqrt(length(barley_sample))
sampling_errors = c(sampling_errors, standard_error)
sampling_means = c(sampling_means, mean(barley_sample))
}
plot(sampling_means, sampling_errors, xlab = "Sample mean", ylab = "Standard error")
lmfit = lm(sampling_errors~sampling_means)
abline(lmfit, col = 'red')
# One way sample test
chisq.test(c(84,82,34),p=c(0.45,0.43,0.12))
##
## Chi-squared test for given probabilities
##
## data: c(84, 82, 34)
## X-squared = 4.7527, df = 2, p-value = 0.09289
# Test on 3 way data (test on homogenity)
ThreeWayData = array(data=c(40,9,34,7,20,15,25,20), dim = c(2,2,2),
dimnames =
list("status"=c("Alive","Dead"),"sex"=c("Male","Female"),
"Genotype"=c("WT","KO")))
ThreeWayData
## , , Genotype = WT
##
## sex
## status Male Female
## Alive 40 34
## Dead 9 7
##
## , , Genotype = KO
##
## sex
## status Male Female
## Alive 20 25
## Dead 15 20
summary(as.table(ThreeWayData))
## Number of cases in table: 170
## Number of factors: 3
## Test for independence of all factors:
## Chisq = 15.765, df = 4, p-value = 0.003351
KO = data.frame("WT"=c(7,3),"KO"=c(2,7),row.names = c("Alive","Dead"))
KO_plot = as.data.frame(as.table(as.matrix(KO)))
names(KO_plot) = c("Phenotype","Genotype","Freq")
ggplot(data=KO_plot, aes(x=Genotype,y=Phenotype)) +
geom_point(aes(size=Freq,color=Freq))
fisher.test(KO)
##
## Fisher's Exact Test for Count Data
##
## data: KO
## p-value = 0.06978
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.7520079 113.4668907
## sample estimates:
## odds ratio
## 7.166282
# Continuous data
summary(data)
## Height Weight
## Min. :63.43 Min. : 97.9
## 1st Qu.:66.52 1st Qu.:119.9
## Median :67.94 Median :127.9
## Mean :67.95 Mean :127.2
## 3rd Qu.:69.20 3rd Qu.:136.1
## Max. :73.90 Max. :159.0
# "A quick summary shows that both variables are continuous and numerical."
# Linearity
plot(data$Height, data$Weight, xlab = "Height", ylab = "Weight")
# looking at the plot
# Absence of outliers
# looking at the plot. (find #200 be outlier)
data = data[-200,]
# Normality distribution of data.
shapiro.test(data$Height)
##
## Shapiro-Wilk normality test
##
## data: data$Height
## W = 0.99415, p-value = 0.627
shapiro.test(data$Weight)
##
## Shapiro-Wilk normality test
##
## data: data$Weight
## W = 0.99398, p-value = 0.6013
# Test
cor.test(data$Height, data$Weight)
##
## Pearson's product-moment correlation
##
## data: data$Height and data$Weight
## t = 9.508, df = 197, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4574401 0.6492864
## sample estimates:
## cor
## 0.560846
# Continuous data. (same)
# Linearity. (same)
# Absence of outliers. (same)
# Normality distribution of data. (same)
# Equality of variance
fit = lm(data$Weight~data$Height)
plot(data$Height, resid(fit), xlab = "Height", ylab = "Residuals")
fit2 = lm(resid(fit)~data$Height)
abline(fit2, col = 'red', lwd = 3)
# Test
fit<-lm(data$Weight~data$Height)
plot(data$Height, data$Weight, xlab = "Height", ylab = "Weight")
abline(fit, col = 'firebrick3', lwd = 3)
model <- aov(Time ~ Gender+Origin, data=marathon)
# Independent random sampling on continuous variable.
# Garanteed.
# Normality of the sampling distribution.
hist(resid(model))
# Homoscedasticity (equality of variances).
plot(model,1)
# Test
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 5.69 5.689 7.681 0.00621 **
## Origin 1 2.82 2.821 3.809 0.05265 .
## Residuals 167 123.70 0.741
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Chromosome missegregation and the consequent aneuploidy are lethal to normal cells, though, fitness of cancer cells is paradoxically enhanced by aneuploidy.
Sister chromatid separation is triggered by cohesin cleavage mediated by separase. Therefore, it is important to keep separase in check. There are two previo...
Cell cycle is an orchestration of colossal molecules in the elaborate cell machine. Analogous to the central theorem, genetic code to protein expression and ...
This is a collection for the basic markdown syntax modified from markdown guide.
A simple review of knowledge taught in data science course.