top of page

Heading 1

DS - ANOVA

 

y1 = c(18.2, 20.1, 17.6, 16.8, 18.8, 19.7, 19.1) y2 = c(17.4, 18.7, 19.1, 16.4, 15.9, 18.4, 17.7) y3 = c(15.2, 18.8, 17.7, 16.5, 15.9, 17.1, 16.7) y = c(y1, y2, y3) n = rep(7, 3) n group = rep(1:3, n) group tmp = tapply(y, group, stem) stem(y) tmpfn = function(x) c(sum = sum(x), mean = mean(x), var = var(x),n = length(x)) tapply(y, group, tmpfn) tmpfn(y) data = data.frame(y = y, group = factor(group)) fit = lm(y ~ group, data) anova(fit) df = anova(fit)[, "Df"] names(df) = c("trt", "err") df alpha = c(0.05, 0.01) qf(alpha, df["trt"], df["err"], lower.tail = FALSE) anova(fit)["Residuals", "Sum Sq"] anova(fit)["Residuals", "Sum Sq"]/qchisq(c(0.025, 0.975), 18,lower.tail = FALSE)

DS - CLUSTERING

​

"k-means clustering" data("iris") names(iris) new_data<-subset(iris,select = c(-Species)) new_data cl=kmeans(new_data,3) data <- new_data wss <- sapply(1:15,function(k){kmeans(data,k)$tot.withinss}) wss plot(1:15,wss,type="b",pch=19,frame=FALSE,xlab="Number of cluster k",ylab = "Total within-cluster sum of squares") install.packages("cluster") library(cluster) clusplot(new_data , cl$cluster , color=TRUE , shade=TRUE , labels = 2 , line=0) cl$cluster cl$clusters "agglomarative clustering" cluster <- hclust(dist(iris[,3:4])) plot(cluster) clusterCut <- cutree(cluster,3) table(clusterCut,iris$Species) install.packages("ggplot2",dependencies = TRUE) library(ggplot2) ggplot(iris,aes(Petal.Length,Petal.Width,color=iris$Species))+geom_point(alpha=0.4,size=3.5)+geom+point(col=clusterCut)+scale_color_manual(values=c('black','red','green')) cluster <- hclust(dist(iris[,3:4]),method = 'average') clusterCut1 <- cutree(clusters,3) table(clusterCut1,iris$Species) plot(clusters) install.packages("ggplot2") library(ggplot2) ggplot2(iris,aes(Petal.Length,Petal.Width,color=iris$Species))+geom_point(alpha=0.4,size=3.5)+geom+point(col=clusterCut1)+scale_color_manual(values=c('pink','red','yellow'))

DS - DECISION TREE

​

mydata<-data.frame(iris) attach(mydata) install.packages("rpart") library(rpart) model<-rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class") plot(model) text(model,use.n=TRUE,all=TRUE,cex=0.8) install.packages("tree") library(tree) model1<-tree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class",split="gini") plot(model1) text(model1,all=TRUE,cex=0.6) install.packages("party") library(party) model2<-ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata) plot(model2) library(tree) mydata<-data.frame(iris) attach(mydata) model1<-tree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=mydata,method="class",control = tree.control(nobs = 150, mincut = 10)) plot(model1) text(model1,all=TRUE,cex=0.6) predict(model1,iris) model2<-ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data = mydata, controls = ctree_control(maxdepth=2)) plot(model2)

DS - HYPOTHESIS

​

x= c(6.2, 6.6, 7.1, 7.4, 7.6, 7.9, 8, 8.3, 8.4, 8.5, 8.6,8.8, 8.8, 9.1, 9.2, 9.4, 9.4, 9.7, 9.9, 10.2, 10.4, 10.8,11.3, 11.9) t.test(x-9,alternative="two.sided",conf.level=0.95) x=c(418,421,421,422,425,427,431,434,437,439,446,447,448,453,454,463,465) y=c(429,430,430,431,36,437,440,441,445,446,447) test2<-t.test(x,y,alternative="two.sided",mu=0,var.equal=F,conf.level=0.95) test2

DS - LOGISTICS

​

library(datasets) ir_data<- iris head(ir_data) str(ir_data) levels(ir_data$Species) sum(is.na(ir_data)) ir_data<-ir_data[1:100,] set.seed(100) samp<-sample(1:100,80) ir_test<-ir_data[samp,] ir_ctrl<-ir_data[-samp,] install.packages("ggplot2") library(ggplot2) install.packages("GGally") library(GGally) ggpairs(ir_test) y<-ir_test$Species; x<-ir_test$Sepal.Length glfit<-glm(y~x, family = 'binomial') summary(glfit) newdata<- data.frame(x=ir_ctrl$Sepal.Length) predicted_val<-predict(glfit, newdata, type="response") prediction<-data.frame(ir_ctrl$Sepal.Length, ir_ctrl$Species,predicted_val) prediction qplot(prediction[,1], round(prediction[,3]), col=prediction[,2], xlab = 'Sepal Length', ylab = 'Prediction using Logistic Reg.')

DS - PCA

​

data_iris <- iris[1:4] Cov_data <- cov(data_iris ) # Find out the eigenvectors and eigenvalues using the covariance matrix Eigen_data <- eigen(Cov_data) # Using the inbuilt function PCA_data <- princomp(data_iris ,cor="False") # Let's now compare the output variances Eigen_data$values PCA_data$sdev^2 PCA_data$loadings[,1:4] Eigen_data$vectors summary(PCA_data) biplot (PCA_data) screeplot(PCA_data, type="lines") #Select the first principal component for the second model model2 = PCA_data$loadings[,1] #For the second model, we need to calculate scores by multiplying our loadings with the data model2_scores <- as.matrix(data_iris) %*% model2 #Loading libraries for naiveBayes model library(class) install.packages("e1071") library(e1071) #Fitting the first model over the entire data mod1<-naiveBayes(iris[,1:4], iris[,5]) #Fitting the second model using the first principal component mod2<-naiveBayes(model2_scores, iris[,5]) # Accuracy for the first model table(predict(mod1, iris[,1:4]), iris[,5]) # Accuracy for the second model table(predict(mod2, model2_scores), iris[,5])

DS - TIME SERIES FORECASTING

​

data(AirPassengers) class(AirPassengers) start(AirPassengers) end(AirPassengers) frequency(AirPassengers) summary(AirPassengers) plot(AirPassengers) abline(reg=lm(AirPassengers~time(AirPassengers))) cycle(AirPassengers) plot(aggregate(AirPassengers,FUN=mean)) boxplot(AirPassengers~cycle(AirPassengers)) acf(log(AirPassengers)) acf(diff(log(AirPassengers))) (fit <- arima(log(AirPassengers), c(0, 1, 1),seasonal = list(order = c(0, 1, 1), period = 12))) pred <- predict(fit, n.ahead = 10*12) ts.plot(AirPassengers,2.718^pred$pred, log = "y", lty = c(1,3))

DS - SIMPLE REGRESSION

​

View(cars) head(cars) scatter.smooth(x=cars$speed, y=cars$dist, main="Dist ~ Speed") par(mfrow=c(1, 2)) boxplot(cars$speed, main="Speed", sub=paste("Outlier rows: ", boxplot.stats(cars$speed)$out)) boxplot(cars$dist, main="Distance", sub=paste("Outlier rows: ", boxplot.stats(cars$dist)$out)) library(e1071) par(mfrow=c(1, 2)) plot(density(cars$speed), main="Density Plot: Speed", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$speed), 2))) polygon(density(cars$speed), col="blue") plot(density(cars$dist), main="Density Plot: Distance", ylab="Frequency", sub=paste("Skewness:", round(e1071::skewness(cars$dist), 2))) polygon(density(cars$dist), col="red") cor(cars$speed, cars$dist) linearMod <- lm(dist ~ speed, data=cars) print(linearMod) summary(linearMod)

MongoDB

​

After download then go to the installation directory of MongoDB and run a file named "mongod" with administrator priveleges. Then run "Mongo.exe" I have already downloaded and installed..

​

1) If you want to use a database with name mydb, then use DATABASE statement would be as follows −

use mydb

​

To check your currently selected database, use the command

db

​

If you want to check your databases list, use the command show dbs.

show dbs

​

Your created database (mydb) is not present in list. To display database, you need to insert at least one document into it.

db.movie.insert({"name":"Trojan AnuragK19"})

show dbs

 

3) Create collection in MongoDB.

db.createCollection("mycollection")

​

4) Inserting into collection.

db.mycollection.insert([{'name':'Anurag Kurmi','age':21}])

​

Multiple values insertion

var cs=[{'name':'Anuj Kurmi','age':18},

{'name':'Rahul Kumar','age':25},

{'name':'John Nadar','age':30}]

db.mycollection.insert(cs)

 

5) MongoDB Query Document

db.mycollection.find()

db.mycollection.find().pretty()

....(json format)

 

6) MongoDB update document

db.mycollection.update({'name':'Rahul Kumar'},{$set:{'name':'Amit Patel'}})

db.mycollection.find()

 

7) Delete Document in MongoDB

db.mycollection.remove({'name':'Amit Patel'})

db.mycollection.find()

 

8) limit() and skip() method in MongoDB.

db.mycollection.find({},{"name":1,_id:0}).limit(1)

 

9) Sorting of documents in MongoDB.

db.mycollection.find({},{"age":1,_id:0}).sort({"age":-1})

 

10) MongoDB Indexing.

db.mycollection.ensureIndex({"name":1})

 

11) MongoDB projection.

db.mycollection.find({},{"name":1,_id:0})

db.mycollection.find({},{"name":1,_id:0}).limit(1).skip(1)

 

12) Dropping a collection in MongoDB.

db.mycollection.drop()

 

2) Dropping a database in MongoDB.

db.dropDatabase()

 

 

Thats it for this video thanks for watching document subscribe and comment...

bottom of page