Finally, today I decided to combine ggplot2 and ggrastr to generate a beautiful Manhattan plot. Using ggrastr, we can rasterize plots but keep lines, text, and labels in vector format (link).
Launch R.
R
Load packages.
library(data.table) library(ggplot2) library(ggrastr) library(patchwork)Load EWAS result (consist of chr, pos, and p-value).
df=as.data.frame(fread("EWAS.res",header=T),stringsAsFactors=F) head(df,4) # chr pos pval # 1 12345 0.9882 # 1 12349 0.6454 # 1 12368 0.8322 # 1 12552 0.2339 df$chr=factor(df$chr,levels=as.character(1:22))Calculate cummulative value of position. This will be used as the position on X-axis on Manhattan plot.
df$pos2=df$pos for(chr in 2:22){ tmp=chr-1 tmp2=max(as.numeric(df[df$chr==tmp,"pos2"]),na.rm=T) df[df$chr==chr,"pos2"]=df[df$chr==chr,"pos"]+tmp2 }Calculate center positions of each chromosome on X-axis. This will be used to set the positions of chromosome numbers.
df2=data.frame(chr=1:22,center=NA,stringsAsFactors=F) for(chr in 1:22){ start=min(df[df$chr==chr,"pos2"],na.rm=T) end=max(df[df$chr==chr,"pos2"],na.rm=T) center=mean(c(start,end)) df2[df2$chr==chr,"center"]=center }Make a repeating pattern of two colors. My favorite is:
colour1="#2e4057" colour2="#8c8c8c"Get Bonferroni-corrected threshold.
bonf=0.05/nrow(df)Ready to plot.
g=ggplot(df,aes(x=pos2,y=-log10(pval),colour=as.character(chr)))+geom_hline(yintercept=-log10(bonf),linetype="dashed")+geom_point_rast(size=0.5)+scale_colour_manual(values=c("1"=colour1,"2"=colour2,"3"=colour1,"4"=colour2,"5"=colour1,"6"=colour2,"7"=colour1,"8"=colour2,"9"=colour1,"10"=colour2,"11"=colour1,"12"=colour2,"13"=colour1,"14"=colour2,"15"=colour1,"16"=colour2,"17"=colour1,"18"=colour2,"19"=colour1,"20"=colour2,"21"=colour1,"22"=colour2))+theme_classic()+theme(legend.position="NONE")+scale_x_continuous(breaks=df2$center,labels=c(1:15,"",17,"",19,"",21,""))+xlab("Chromosome")+ylab(expression(paste(-log[10],"(",italic(P), " value)")))+geom_hline(yintercept=-log10(bonf),linetype="dashed") ggsave("mahnattan.pdf",g,width=6,height=3)Points are generated in raster format while lines and texts are in vector format.