Heading 1
DS - ANOVA
y1 = c(18.2, 20.1, 17.6, 16.8, 18.8, 19.7, 19.1) y2 = c(17.4, 18.7, 19.1, 16.4, 15.9, 18.4, 17.7) y3 = c(15.2, 18.8, 17.7, 16.5, 15.9, 17.1, 16.7) y = c(y1, y2, y3) n = rep(7, 3) n group = rep(1:3, n) group tmp = tapply(y, group, stem) stem(y) tmpfn = function(x) c(sum = sum(x), mean = mean(x), var = var(x),n = length(x)) tapply(y, group, tmpfn) tmpfn(y) data = data.frame(y = y, group = factor(group)) fit = lm(y ~ group, data) anova(fit) df = anova(fit)[, "Df"] names(df) = c("trt", "err") df alpha = c(0.05, 0.01) qf(alpha, df["trt"], df["err"], lower.tail = FALSE) anova(fit)["Residuals", "Sum Sq"] anova(fit)["Residuals", "Sum Sq"]/qchisq(c(0.025, 0.975), 18,lower.tail = FALSE)
DS - CLUSTERING
​
"k-means clustering" data("iris") names(iris) new_data<-subset(iris,select = c(-Species)) new_data cl=kmeans(new_data,3) data <- new_data wss <- sapply(1:15,function(k){kmeans(data,k)$tot.withinss}) wss plot(1:15,wss,type="b",pch=19,frame=FALSE,xlab="Number of cluster k",ylab = "Total within-cluster sum of squares") install.packages("cluster") library(cluster) clusplot(new_data , cl$cluster , color=TRUE , shade=TRUE , labels = 2 , line=0) cl$cluster cl$clusters "agglomarative clustering" cluster <- hclust(dist(iris[,3:4])) plot(cluster) clusterCut <- cutree(cluster,3) table(clusterCut,iris$Species) install.packages("ggplot2",dependencies = TRUE) library(ggplot2) ggplot(iris,aes(Petal.Length,Petal.Width,color=iris$Species))+geom_point(alpha=0.4,size=3.5)+geom+point(col=clusterCut)+scale_color_manual(values=c('black','red','green')) cluster <- hclust(dist(iris[,3:4]),method = 'average') clusterCut1 <- cutree(clusters,3) table(clusterCut1,iris$Species) plot(clusters) install.packages("ggplot2") library(ggplot2) ggplot2(iris,aes(Petal.Length,Petal.Width,color=iris$Species))+geom_point(alpha=0.4,size=3.5)+geom+point(col=clusterCut1)+scale_color_manual(values=c('pink','red','yellow'))
DS - DECISION TREE
​
mydata<-data.frame(iris) attach(mydata) install.packages("rpart") library(rpart) model<-rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class") plot(model) text(model,use.n=TRUE,all=TRUE,cex=0.8) install.packages("tree") library(tree) model1<-tree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class",split="gini") plot(model1) text(model1,all=TRUE,cex=0.6) install.packages("party") library(party) model2<-ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata) plot(model2) library(tree) mydata<-data.frame(iris) attach(mydata) model1<-tree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class",control = tree.control(nobs = 150, mincut = 10)) plot(model1) text(model1,all=TRUE,cex=0.6) predict(model1,iris) model2<-ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data = mydata, controls = ctree_control(maxdepth=2)) plot(model2)
DS - HYPOTHESIS
​
x= c(6.2, 6.6, 7.1, 7.4, 7.6, 7.9, 8, 8.3, 8.4, 8.5, 8.6,8.8, 8.8, 9.1, 9.2, 9.4, 9.4, 9.7, 9.9, 10.2, 10.4, 10.8,11.3, 11.9) t.test(x-9,alternative="two.sided",conf.level=0.95) x=c(418,421,421,422,425,427,431,434,437,439,446,447,448,453,454,463,465) y=c(429,430,430,431,36,437,440,441,445,446,447) test2<-t.test(x,y,alternative="two.sided",mu=0,var.equal=F,conf.level=0.95) test2
DS - LOGISTICS
​
library(datasets) ir_data<- iris head(ir_data) str(ir_data) levels(ir_data$Species) sum(is.na(ir_data)) ir_data<-ir_data[1:100,] set.seed(100) samp<-sample(1:100,80) ir_test<-ir_data[samp,] ir_ctrl<-ir_data[-samp,] install.packages("ggplot2") library(ggplot2) install.packages("GGally") library(GGally) ggpairs(ir_test) y<-ir_test$Species; x<-ir_test$Sepal.Length glfit<-glm(y~x, family = 'binomial') summary(glfit) newdata<- data.frame(x=ir_ctrl$Sepal.Length) predicted_val<-predict(glfit, newdata, type="response") prediction<-data.frame(ir_ctrl$Sepal.Length, ir_ctrl$Species,predicted_val) prediction qplot(prediction[,1], round(prediction[,3]), col=prediction[,2], xlab = 'Sepal Length', ylab = 'Prediction using Logistic Reg.')
DS - PCA
​
data_iris <- iris[1:4] Cov_data <- cov(data_iris ) # Find out the eigenvectors and eigenvalues using the covariance matrix Eigen_data <- eigen(Cov_data) # Using the inbuilt function PCA_data <- princomp(data_iris ,cor="False") # Let's now compare the output variances Eigen_data$values PCA_data$sdev^2 PCA_data$loadings[,1:4] Eigen_data$vectors summary(PCA_data) biplot (PCA_data) screeplot(PCA_data, type="lines") #Select the first principal component for the second model model2 = PCA_data$loadings[,1] #For the second model, we need to calculate scores by multiplying our loadings with the data model2_scores <- as.matrix(data_iris) %*% model2 #Loading libraries for naiveBayes model library(class) install.packages("e1071") library(e1071) #Fitting the first model over the entire data mod1<-naiveBayes(iris[,1:4], iris[,5]) #Fitting the second model using the first principal component mod2<-naiveBayes(model2_scores, iris[,5]) # Accuracy for the first model table(predict(mod1, iris[,1:4]), iris[,5]) # Accuracy for the second model table(predict(mod2, model2_scores), iris[,5])
DS - TIME SERIES FORECASTING
​
data(AirPassengers) class(AirPassengers) start(AirPassengers) end(AirPassengers) frequency(AirPassengers) summary(AirPassengers) plot(AirPassengers) abline(reg=lm(AirPassengers~time(AirPassengers))) cycle(AirPassengers) plot(aggregate(AirPassengers,FUN=mean)) boxplot(AirPassengers~cycle(AirPassengers)) acf(log(AirPassengers)) acf(diff(log(AirPassengers))) (fit <- arima(log(AirPassengers), c(0, 1, 1),seasonal = list(order = c(0, 1, 1), period = 12))) pred <- predict(fit, n.ahead = 10*12) ts.plot(AirPassengers,2.718^pred$pred, log = "y", lty = c(1,3))
DS - SIMPLE REGRESSION
​
View(cars) head(cars) scatter.smooth(x=cars$speed, y=cars$dist, main="Dist ~ Speed") par(mfrow=c(1, 2)) boxplot(cars$speed, main="Speed", sub=paste("Outlier rows: ", boxplot.stats(cars$speed)$out)) boxplot(cars$dist, main="Distance", sub=paste("Outlier rows: ", boxplot.stats(cars$dist)$out)) library(e1071) par(mfrow=c(1, 2)) plot(density(cars$speed), main="Density Plot: Speed", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$speed), 2))) polygon(density(cars$speed), col="blue") plot(density(cars$dist), main="Density Plot: Distance", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$dist), 2))) polygon(density(cars$dist), col="red") cor(cars$speed, cars$dist) linearMod <- lm(dist ~ speed, data=cars) print(linearMod) summary(linearMod)
MongoDB
​
After download then go to the installation directory of MongoDB and run a file named "mongod" with administrator priveleges. Then run "Mongo.exe" I have already downloaded and installed..
​
1) If you want to use a database with name mydb, then use DATABASE statement would be as follows −
use mydb
​
To check your currently selected database, use the command
db
​
If you want to check your databases list, use the command show dbs.
show dbs
​
Your created database (mydb) is not present in list. To display database, you need to insert at least one document into it.
db.movie.insert({"name":"Trojan AnuragK19"})
show dbs
3) Create collection in MongoDB.
db.createCollection("mycollection")
​
4) Inserting into collection.
db.mycollection.insert([{'name':'Anurag Kurmi','age':21}])
​
Multiple values insertion
var cs=[{'name':'Anuj Kurmi','age':18},
{'name':'Rahul Kumar','age':25},
{'name':'John Nadar','age':30}]
db.mycollection.insert(cs)
5) MongoDB Query Document
db.mycollection.find()
db.mycollection.find().pretty()
....(json format)
6) MongoDB update document
db.mycollection.update({'name':'Rahul Kumar'},{$set:{'name':'Amit Patel'}})
db.mycollection.find()
7) Delete Document in MongoDB
db.mycollection.remove({'name':'Amit Patel'})
db.mycollection.find()
8) limit() and skip() method in MongoDB.
db.mycollection.find({},{"name":1,_id:0}).limit(1)
9) Sorting of documents in MongoDB.
db.mycollection.find({},{"age":1,_id:0}).sort({"age":-1})
10) MongoDB Indexing.
db.mycollection.ensureIndex({"name":1})
11) MongoDB projection.
db.mycollection.find({},{"name":1,_id:0})
db.mycollection.find({},{"name":1,_id:0}).limit(1).skip(1)
12) Dropping a collection in MongoDB.
db.mycollection.drop()
2) Dropping a database in MongoDB.
db.dropDatabase()
Thats it for this video thanks for watching document subscribe and comment...