Heading 1

DS - ANOVA

y1 = c(18.2, 20.1, 17.6, 16.8, 18.8, 19.7, 19.1) y2 = c(17.4, 18.7, 19.1, 16.4, 15.9, 18.4, 17.7) y3 = c(15.2, 18.8, 17.7, 16.5, 15.9, 17.1, 16.7) y = c(y1, y2, y3) n = rep(7, 3) n group = rep(1:3, n) group tmp = tapply(y, group, stem) stem(y) tmpfn = function(x) c(sum = sum(x), mean = mean(x), var = var(x),n = length(x)) tapply(y, group, tmpfn) tmpfn(y) data = data.frame(y = y, group = factor(group)) fit = lm(y ~ group, data) anova(fit) df = anova(fit)[, "Df"] names(df) = c("trt", "err") df alpha = c(0.05, 0.01) qf(alpha, df["trt"], df["err"], lower.tail = FALSE) anova(fit)["Residuals", "Sum Sq"] anova(fit)["Residuals", "Sum Sq"]/qchisq(c(0.025, 0.975), 18,lower.tail = FALSE)

DS - CLUSTERING

"k-means clustering" data("iris") names(iris) new_data<-subset(iris,select = c(-Species)) new_data cl=kmeans(new_data,3) data <- new_data wss <- sapply(1:15,function(k){kmeans(data,k)$tot.withinss}) wss plot(1:15,wss,type="b",pch=19,frame=FALSE,xlab="Number of cluster k",ylab = "Total within-cluster sum of squares") install.packages("cluster") library(cluster) clusplot(new_data , cl$cluster , color=TRUE , shade=TRUE , labels = 2 , line=0) cl$cluster cl$clusters "agglomarative clustering" cluster <- hclust(dist(iris[,3:4])) plot(cluster) clusterCut <- cutree(cluster,3) table(clusterCut,iris$Species) install.packages("ggplot2",dependencies = TRUE) library(ggplot2) ggplot(iris,aes(Petal.Length,Petal.Width,color=iris$Species))+geom_point(alpha=0.4,size=3.5)+geom+point(col=clusterCut)+scale_color_manual(values=c('black','red','green')) cluster <- hclust(dist(iris[,3:4]),method = 'average') clusterCut1 <- cutree(clusters,3) table(clusterCut1,iris$Species) plot(clusters) install.packages("ggplot2") library(ggplot2) ggplot2(iris,aes(Petal.Length,Petal.Width,color=iris$Species))+geom_point(alpha=0.4,size=3.5)+geom+point(col=clusterCut1)+scale_color_manual(values=c('pink','red','yellow'))

DS - DECISION TREE

mydata<-data.frame(iris) attach(mydata) install.packages("rpart") library(rpart) model<-rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class") plot(model) text(model,use.n=TRUE,all=TRUE,cex=0.8) install.packages("tree") library(tree) model1<-tree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class",split="gini") plot(model1) text(model1,all=TRUE,cex=0.6) install.packages("party") library(party) model2<-ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata) plot(model2) library(tree) mydata<-data.frame(iris) attach(mydata) model1<-tree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class",control = tree.control(nobs = 150, mincut = 10)) plot(model1) text(model1,all=TRUE,cex=0.6) predict(model1,iris) model2<-ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data = mydata, controls = ctree_control(maxdepth=2)) plot(model2)

DS - HYPOTHESIS

x= c(6.2, 6.6, 7.1, 7.4, 7.6, 7.9, 8, 8.3, 8.4, 8.5, 8.6,8.8, 8.8, 9.1, 9.2, 9.4, 9.4, 9.7, 9.9, 10.2, 10.4, 10.8,11.3, 11.9) t.test(x-9,alternative="two.sided",conf.level=0.95) x=c(418,421,421,422,425,427,431,434,437,439,446,447,448,453,454,463,465) y=c(429,430,430,431,36,437,440,441,445,446,447) test2<-t.test(x,y,alternative="two.sided",mu=0,var.equal=F,conf.level=0.95) test2

DS - LOGISTICS

library(datasets) ir_data<- iris head(ir_data) str(ir_data) levels(ir_data$Species) sum(is.na(ir_data)) ir_data<-ir_data[1:100,] set.seed(100) samp<-sample(1:100,80) ir_test<-ir_data[samp,] ir_ctrl<-ir_data[-samp,] install.packages("ggplot2") library(ggplot2) install.packages("GGally") library(GGally) ggpairs(ir_test) y<-ir_test$Species; x<-ir_test$Sepal.Length glfit<-glm(y~x, family = 'binomial') summary(glfit) newdata<- data.frame(x=ir_ctrl$Sepal.Length) predicted_val<-predict(glfit, newdata, type="response") prediction<-data.frame(ir_ctrl$Sepal.Length, ir_ctrl$Species,predicted_val) prediction qplot(prediction[,1], round(prediction[,3]), col=prediction[,2], xlab = 'Sepal Length', ylab = 'Prediction using Logistic Reg.')

DS - PCA

data_iris <- iris[1:4] Cov_data <- cov(data_iris ) # Find out the eigenvectors and eigenvalues using the covariance matrix Eigen_data <- eigen(Cov_data) # Using the inbuilt function PCA_data <- princomp(data_iris ,cor="False") # Let's now compare the output variances Eigen_data$values PCA_data$sdev^2 PCA_data$loadings[,1:4] Eigen_data$vectors summary(PCA_data) biplot (PCA_data) screeplot(PCA_data, type="lines") #Select the first principal component for the second model model2 = PCA_data$loadings[,1] #For the second model, we need to calculate scores by multiplying our loadings with the data model2_scores <- as.matrix(data_iris) %*% model2 #Loading libraries for naiveBayes model library(class) install.packages("e1071") library(e1071) #Fitting the first model over the entire data mod1<-naiveBayes(iris[,1:4], iris[,5]) #Fitting the second model using the first principal component mod2<-naiveBayes(model2_scores, iris[,5]) # Accuracy for the first model table(predict(mod1, iris[,1:4]), iris[,5]) # Accuracy for the second model table(predict(mod2, model2_scores), iris[,5])

DS - TIME SERIES FORECASTING

data(AirPassengers) class(AirPassengers) start(AirPassengers) end(AirPassengers) frequency(AirPassengers) summary(AirPassengers) plot(AirPassengers) abline(reg=lm(AirPassengers~time(AirPassengers))) cycle(AirPassengers) plot(aggregate(AirPassengers,FUN=mean)) boxplot(AirPassengers~cycle(AirPassengers)) acf(log(AirPassengers)) acf(diff(log(AirPassengers))) (fit <- arima(log(AirPassengers), c(0, 1, 1),seasonal = list(order = c(0, 1, 1), period = 12))) pred <- predict(fit, n.ahead = 10*12) ts.plot(AirPassengers,2.718^pred$pred, log = "y", lty = c(1,3))

DS - SIMPLE REGRESSION

View(cars) head(cars) scatter.smooth(x=cars$speed, y=cars$dist, main="Dist ~ Speed") par(mfrow=c(1, 2)) boxplot(cars$speed, main="Speed", sub=paste("Outlier rows: ", boxplot.stats(cars$speed)$out)) boxplot(cars$dist, main="Distance", sub=paste("Outlier rows: ", boxplot.stats(cars$dist)$out)) library(e1071) par(mfrow=c(1, 2)) plot(density(cars$speed), main="Density Plot: Speed", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$speed), 2))) polygon(density(cars$speed), col="blue") plot(density(cars$dist), main="Density Plot: Distance", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$dist), 2))) polygon(density(cars$dist), col="red") cor(cars$speed, cars$dist) linearMod <- lm(dist ~ speed, data=cars) print(linearMod) summary(linearMod)

MongoDB

After download then go to the installation directory of MongoDB and run a file named "mongod" with administrator priveleges. Then run "Mongo.exe" I have already downloaded and installed..

1) If you want to use a database with name mydb, then use DATABASE statement would be as follows −

use mydb

To check your currently selected database, use the command

If you want to check your databases list, use the command show dbs.

show dbs

Your created database (mydb) is not present in list. To display database, you need to insert at least one document into it.

db.movie.insert({"name":"Trojan AnuragK19"})

show dbs

3) Create collection in MongoDB.

db.createCollection("mycollection")

4) Inserting into collection.

db.mycollection.insert([{'name':'Anurag Kurmi','age':21}])

Multiple values insertion

var cs=[{'name':'Anuj Kurmi','age':18},

{'name':'Rahul Kumar','age':25},

{'name':'John Nadar','age':30}]

db.mycollection.insert(cs)

5) MongoDB Query Document

db.mycollection.find()

db.mycollection.find().pretty()

....(json format)

6) MongoDB update document

db.mycollection.update({'name':'Rahul Kumar'},{$set:{'name':'Amit Patel'}})

db.mycollection.find()

7) Delete Document in MongoDB

db.mycollection.remove({'name':'Amit Patel'})

db.mycollection.find()

8) limit() and skip() method in MongoDB.

db.mycollection.find({},{"name":1,_id:0}).limit(1)

9) Sorting of documents in MongoDB.

db.mycollection.find({},{"age":1,_id:0}).sort({"age":-1})

10) MongoDB Indexing.

db.mycollection.ensureIndex({"name":1})

11) MongoDB projection.

db.mycollection.find({},{"name":1,_id:0})

db.mycollection.find({},{"name":1,_id:0}).limit(1).skip(1)

12) Dropping a collection in MongoDB.

db.mycollection.drop()

2) Dropping a database in MongoDB.

db.dropDatabase()

Thats it for this video thanks for watching document subscribe and comment...

Heading 1

Registered Charity Number : 0123456789