R_handbook_统计分析
- 游戏开发
- 2025-07-21 18:58:02

一、分布检验 1 四种常用函数 dnorm: density norm,表示正太分布的概率密度(f),即单点取值的概率。如果生成序列点回复即得到正太线pnorm:pribability,表示正态分布的累积分布,最终生成CDF线qnorm:与pnorm相反,pnorm根据数值求累积分布(0-1),qnorm根据累积分布求数值rnorm:生成一组正太随机数。 2 各种分布与检验 2.1 对数分布和检验 library(MASS) # 1.1 log-noraml distribution ## 拟合lognormal模型 lognormal_distr <- fitdistr(as.array(data[,1]),"lognormal") ## 依次输出模型的系数、方差、最大似然值 lognormal_distr$estimate lognormal_distr$sd lognormal_distr$loglik ## 图形分布 + 拟合分布 par(pin=c(5,5)) h_lognormal <-hist(as.array(data[,1]),ylim = c(0,230), main = "Histogram of lognormal",xlab='data') # 绘制源数据的直方图 xfit <-seq(min(data[,1]), max(data[,1]), by=(max(data[,1])-min(data[,1]))/1000) yfit <-dlnorm(xfit, meanlog = lognormal_distr$estimate[1], sdlog = lognormal_distr$estimate[2]) yfit <- yfit*diff(h_lognormal$mids[1:2])*length(xfit) lines(xfit, yfit, col="blue", lwd=2) # K-S test ## 没有直接检验对数正态分布的函数,需要转化后用对数分布检验 lognormal <- c(data[,1]) lognormal_to_normal <- log(lognormal) ## 进行K-S test 并输出结果 lognormal_ks_test <- ks.test(lognormal_to_normal, "pnorm") lognormal_ks_test # A-D test library(fBasics) lognormal_ad_test <- adTest(lognormal_to_normal) lognormal_ad_test # Q-Q图 ## 自己实现QQ图 t <- (rank(lognormal_to_normal) -0.5)/length(lognormal_to_normal) q <- qnorm(t) plot(q, lognormal_to_normal,main = "Lognormal Q-Q plot",xlab = "Theoretical Quantiles",ylab = "Sample Quantiles") abline(mean(lognormal_to_normal), sd(lognormal_to_normal), col=2, lwd=3) 2.2 gamma分布 # 1.2 gamma distribution ## 拟合gamma模型 gamma_distr <- fitdistr(as.array(data[,1]),"gamma") ## 依次输出模型的系数、方差、最大似然值 gamma_distr$estimate gamma_distr$sd gamma_distr$loglik ## 图形分布 + 拟合分布 par(pin=c(5,5)) h_gamma <-hist(as.array(data[,1]),ylim = c(0,230),main = "Histogram of Gamma",xlab='data') # 绘制源数据的直方图 xfit <-seq(min(data[,1]), max(data[,1]), by=(max(data[,1])-min(data[,1]))/1000) yfit <-dgamma(xfit, shape = gamma_distr$estimate[1], rate = gamma_distr$estimate[2]) yfit <- yfit*diff(h_gamma$mids[1:2])*length(xfit) lines(xfit, yfit, col="blue", lwd=2) # K-S test ## 没有直接检验对数正态分布的函数,需要转化后用对数分布检验 gamma_ks_test <- ks.test(as.array(data[,1]), "gamma") gamma_ks_test ## 进行K-S test 并输出结果 gamma_ad_test <- adTest(as.array(data[,1]), "pnorm") gamma_ad_test # Q-Q图 只能使用自己的QQ图画法 ## 自己实现QQ图 gamma_data <- as.array(data[,1]) t <- (rank(gamma_data) -0.5)/length(gamma_data) q <- qgamma(t,shape = gamma_distr$estimate[1], rate = gamma_distr$estimate[2]) plot(q, gamma_data,main = "Gamma Q-Q plot",xlab = "Theoretical Quantiles",ylab = "Sample Quantiles") abline(0, 1, col=2, lwd=3) 2.3 帕累托分布 # 1.3 pareto distribution library(actuar) library(fitdistrplus) pareto_data <- as.vector(as.array(data[,1])) ## 拟合pareto模型, method='mle'需要指定 pareto_distr <- fitdist(pareto_data,"pareto",method = 'mle', start=list(shape=0.1, scale=0.1)) ## 依次输出模型的系数、方差、最大似然值 pareto_distr$estimate pareto_distr$sd pareto_distr$loglik ## 图形分布 + 拟合分布 par(pin=c(5,5)) h_pareto <-hist(pareto_data,ylim = c(0,230),main = "Histogram of Pareto",xlab='data') # 绘制源数据的直方图 xfit <-seq(min(data[,1]), max(data[,1]), by=(max(data[,1])-min(data[,1]))/1000) yfit <-dpareto(xfit, shape = pareto_distr$estimate[1], scale = pareto_distr$estimate[2]) yfit <- yfit*diff(h_pareto$mids[1:2])*length(xfit) lines(xfit, yfit, col="blue", lwd=2) # K-S test ## 没有直接检验对数正态分布的函数,需要转化后用对数分布检验 t <- (rank(pareto_data) -0.5)/length(pareto_data) q <- qweibull(t, shape = pareto_distr$estimate[1], scale = pareto_distr$estimate[2]) pareto_ks_test <- ks.test(as.array(data[,1]), q) pareto_ks_test # A-D test pareto_ad_test <- adTest(as.array(data[,1]), "pnorm") pareto_ad_test # Q-Q图 只能使用自己的QQ图画法 ## 自己实现QQ图 plot(q, pareto_data,main = "Pareto Q-Q plot",xlab = "Theoretical Quantiles",ylab = "Sample Quantiles") abline(0, 1, col=2, lwd=3) 2.4 weibull分布 ## 拟合weibull模型 weibull_distr <- fitdistr(as.array(data[,1]),"weibull") ## 依次输出模型的系数、方差、最大似然值 weibull_distr$estimate weibull_distr$sd weibull_distr$loglik ## 图形分布 + 拟合分布 par(pin=c(5,5)) weibull_data <- as.array(data[,1]) h_weibull <-hist(weibull_data,ylim = c(0,230),main = "Histogram of Weibull",xlab='data') # 绘制源数据的直方图 xfit <-seq(min(data[,1]), max(data[,1]), by=(max(data[,1])-min(data[,1]))/1000) yfit <-dweibull(xfit, shape = weibull_distr$estimate[1], scale = weibull_distr$estimate[2]) yfit <- yfit*diff(h_weibull$mids[1:2])*length(xfit) lines(xfit, yfit, col="blue", lwd=2) # K-S test ## 没有直接检验对数正态分布的函数,需要转化后用对数分布检验 t <- (rank(weibull_data) -0.5)/length(weibull_data) q <- qweibull(t, shape = weibull_distr$estimate[1], scale = weibull_distr$estimate[2]) weibull_ks_test <- ks.test(as.array(data[,1]), q) gamma_ks_test # A-D test weibull_ad_test <- adTest(as.array(data[,1]), "pnorm") weibull_ad_test # Q-Q图 只能使用自己的QQ图画法 ## 自己实现QQ图 plot(q, weibull_data,main = "Weibull Q-Q plot",xlab = "Theoretical Quantiles",ylab = "Sample Quantiles") abline(0, 1, col=2, lwd=3) 二、方差分析 1 方差分析 1.1 单因素分析 attach(linseed) table(Plot) result_mean <-aggregate(Yield,by = list(Plot),FUN= mean) result_sd <- aggregate(Yield,by = list(Plot),FUN= sd) fit<-aov(Yield~Plot) summary(fit) TukeyHSD(fit) detach(linseed) 1.2 双因素方差分析 attach(wafer) # 锁定数据集 table(Furnace, Wafer_Type) # 交叉查看两个因素 result_mean <- aggregate(Thickness , by = list(Furnace, Wafer_Type) , FUN = mean) # 交叉均值 result_df <- aggregate(Thickness by = list(Furnace, Wafer_Type) , FUN = sd) # 交叉方差 fit <- aov(Thickness ~ Furnace * Wafer.Type) # 双因素方差分析 summary(fit) # 输入结论 TukeyHSD(fit) # 对任意两组输出Tukey honest significant differences detach(wafer) # 解锁数据集 2 列联表分析 双向无序列联表:行和列均只有两个且无序,使用Pearson卡方检验、Fisher精确概率单项有序的列联表:常见的情况是结果变量有序,而原因变量无序。用Mann–Whitney U 检验、Kruskal-Wallis H检验行列有序且属性相同:比如两列但阴阳。行列变量独立: Kappa一致性检验-即交叉表。配对行列表-McNemar检验、Bowker检验。 2.1 Pearson卡方检验 df <- tibble(count = c(56,283,55,360), Gender = c("Male", "Male", "Female", "Female"), Response = c("Mentioned", "Not Mentioned", "Mentioned","Not Mentioned")) tbl <- xtabs(count~Response+Gender, df) # 生成一个列联表 chisq.test(tbl) # 结果结合皮尔逊系数检验即可 2.2 Kruskal-Wallis H df<-tibble(Grade = rep(c("A", "B", "C", "D-F"),3), count = c(8,14,15,3,15,19,4,1,13,15,7,4), major = c(rep("Psychology",4), rep("Biology",4), rep("Other",4))) xtabs(count~Grade+major, df)%>% kable("html",table.attr = "style='width:50%;'",align = "c")%>% kable_styling(position = "center") # 这里专业是无序的,成绩是有序的,且分组数大于2,使用Kruskal-Wallis H df$major_and_grade <- paste(df$Grade,df$major,sep="~") kruskal.test(count~major_and_grade,data=df) 三、相关性分析 1 皮尔森相关系数 pearson_test <- cor.test(as.array(tem_data$age), as.array(tem_data$confidence), method = "pearson", use = "complete.obs")
R_handbook_统计分析由讯客互联游戏开发栏目发布,感谢您对讯客互联的认可,以及对我们原创作品以及文章的青睐,非常欢迎各位朋友分享到个人网站或者朋友圈,但转载请说明文章出处“R_handbook_统计分析”
上一篇
uni-apptabbar组件
下一篇
ES应用_ES原理