moved files to /home/groups/CEDAR/mulqueen/oroak_organoid
so search replace
/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab
with
/home/groups/CEDAR/mulqueen/oroak_organoid
I ran multiple sequecing runs for the sciATAC. For now I am just processing the most recent, but I will loop back to the original Pitstop2 experiments.
#First Prep
/home/groups/oroaklab/seq/hiseq/180630_AML_Pitstop
#Second prep
/home/groups/oroaklab/seq/madbum/200721_NS500556_0411_AHCM3CAFX2
/home/groups/oroaklab/seq/madbum/200804_NS500556_0413_AHCMMJBGXF
#RNA prep
/home/groups/oroaklab/seq/madbum/191113_NS500556_0361_AHTVFLAFXY
/home/groups/oroaklab/seq/madbum/191118_NS500556_0362_AHVYV7AFXY
/home/groups/oroaklab/seq/madbum/191119_NS500556_0363_AHTVL7AFXY
Includes barcode assignment, fastq splitting, alignment, removal of duplicate reads, calling peaks and looking at TSS enrichment.
#200722 Organoid Processing
NextSeq2fastq -R 200721_NS500556_0411_AHCM3CAFX2
NextSeq2fastq -R 200804_NS500556_0413_AHCMMJBGXF
outdir="/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis"
mkdir $outdir
scitools fastq-dump -R 200721_NS500556_0411_AHCM3CAFX2 -O $outdir
scitools fastq-dump -R 200804_NS500556_0413_AHCMMJBGXF -O $outdir
#quick annotation generation for initial fastq splitting from other libraries sequenced on the same run
scitools make-annot orgo+NEX,CB=ALL+NEX,BC=ALL+NEX,CA=ALL+NEX,BB=ALL+NEX,AA=ALL+NEX,BA=ALL\
+PCR,CB=ALL+PCR,CA=ALL+PCR,CC=ALL+PCR,CD=ALL,+PCR,CE=ALL,+PCR,CF=ALL,+PCR,AD=ALL,+PCR,AE=ALL,+PCR,AF=ALL,+PCR,AG=ALL > base_orgo.annot
scitools split-fastq -X -A base_orgo.annot 200721_NS500556_0411_AHCM3CAFX2.1.fq.gz 200721_NS500556_0411_AHCM3CAFX2.2.fq.gz
scitools split-fastq -X -A base_orgo.annot 200804_NS500556_0413_AHCMMJBGXF.1.fq.gz 200804_NS500556_0413_AHCMMJBGXF.2.fq.gz
#move all relevant files to /home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis
#Align with a wrapper for bwa mem ###RUNNING
scitools fastq-align -t 10 -r 10 hg38 orgo_prep2_1 200721_NS500556_0411_AHCM3CAFX2.1.fq.gz 200721_NS500556_0411_AHCM3CAFX2.2.fq.gz &
scitools fastq-align -t 10 -r 10 hg38 orgo_prep2_2 200804_NS500556_0413_AHCMMJBGXF.orgo.1.fq.gz 200804_NS500556_0413_AHCMMJBGXF.orgo.2.fq.gz
scitools fastq-align -t 20 -r 20 hg38 orgo_prep1_1 180630.RM.1.fq.gz 180630.RM.1.fq.gz &
#Barcode based remove duplicates, barcodes are contained in the read name
scitools bam-rmdup -t 10 orgo_prep2_1.bam &
scitools bam-rmdup -t 10 orgo_prep2_2.bam &
scitools bam-rmdup -t 10 orgo_prep1_1.bam &
#Filter based on barcodes with >1000 unique reads
for i in orgo_prep1_1.bbrd.q10.bam orgo_prep2_1.bbrd.q10.bam orgo_prep2_2.bbrd.q10.bam; do scitools bam-filter -N 1000 $i ; done &
#merge bam files
scitools bam-merge orgo.bam orgo_prep1_1.bbrd.q10.filt.bam orgo_prep2_1.bbrd.q10.filt.bam orgo_prep2_2.bbrd.q10.filt.bam
#Look at tss enrichment to ensure libraries are of good quality
module load bedops/2.4.36
scitools bam-tssenrich -X -E orgo.ID.bam hg38 & #bulk ENCODE method
scitools bam-tssenrich -X orgo.ID.bam hg38 & #single cell method
#Did a fresh install of macs2 for py3 environment
#pip install macs2 #for python3 macs2
scitools callpeaks orgo.bam &
#Count of previously annotated human fetal corticogenesis peaks
bedtools intersect -a orgo.500.bed -b /home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/Song_2020/celltype_peaks.bed -wa -u | wc -l
#Modifying bam file to include prep number in cellID field (to maintain single cell identity through index collisions)
((samtools view -H orgo.bam)&(samtools view orgo.bam |awk 'OFS="\t" {split($1,a,":");split(a[3],b,"="); $1=a[1]"_"b[2]":"a[2]":"a[3]; print $0}')) | samtools view -bS - > orgo.ID.bam &
#Generating sparse matrix format counts matrix
scitools atac-counts orgo.ID.bam orgo.500.bed &
#generated organoid annotation file from google sheet https://docs.google.com/spreadsheets/d/1k93smqwxmYVUMLqq9SG8UjgjFeTinERzOflWUMRN8n8/edit#gid=1394545516
#wrote out with nano into tsv format
R #Using R 4.0
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
library(reshape2)
####read in files
first_prep_annot_path="/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis/first_prep_annot"
second_prep_annot_path="/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis/second_prep_annot"
first_prep_tn5_plates<-list.files(path=first_prep_annot_path,pattern="^tn5_plate_...tsv")
second_prep_tn5_plates<-list.files(path=second_prep_annot_path,pattern="^tn5_plate_...tsv")
idx<-read.table("/home/groups/oroaklab/src/scitools/scitools-dev/SCI_Indexes.txt",col.names=c("idx_name","idx_cycles","idx_seq"))
first_prep_orgid<-read.table(paste0(first_prep_annot_path,"/","tn5_plate_orgid.tsv"),header=T,sep="\t",col.names=c("orgID","differentiation_exp","cell_line","DIV","tag_wells","perc_tag_wells","est_cells_per_pcr_plate","cells_counted","sort_gate","freezing_protocol","treatment","organoid"))
second_prep_orgid<-read.table(paste0(second_prep_annot_path,"/","tn5_plate_orgid.tsv"),header=T,sep="\t",col.names=c("orgID","differentiation_exp","cell_line","DIV","tag_wells","perc_tag_wells","est_cells_per_pcr_plate","est_cells_per5_pcr_plate","est_cells_per10_pcr_plate"))
compl_1<-read.table("./source_fastq/preprocessing_files/orgo_prep1_1.complexity.txt",row.names=1,col.names=c("row.names","cellID","total_reads","uniq_reads","perc_uniq"))
compl_2<-read.table("./source_fastq/preprocessing_files/orgo_prep2_1.complexity.txt",row.names=1,col.names=c("row.names","cellID","total_reads","uniq_reads","perc_uniq"))
compl_3<-read.table("./source_fastq/preprocessing_files/orgo_prep2_2.complexity.txt",row.names=1,col.names=c("row.names","cellID","total_reads","uniq_reads","perc_uniq"))
#split index names to columns
idx$idx_plateloc<-unlist(lapply(as.character(idx$idx_name),FUN=function(x) strsplit(x,"_")[[1]][4]))
idx$idx_moleculesrc<-unlist(lapply(as.character(idx$idx_name),FUN=function(x) strsplit(x,"_")[[1]][1]))
idx$idx_platename<-unlist(lapply(as.character(idx$idx_name),FUN=function(x) strsplit(x,"_")[[1]][2]))
idx$idx_moleculeloc<-unlist(lapply(as.character(idx$idx_name),FUN=function(x) strsplit(x,"_")[[1]][3]))
#subset index sequences by cycle
idx_tn5i5<-idx[idx$idx_moleculeloc=="i5" & idx$idx_moleculesrc=="Tn5",]
idx_tn5i7<-idx[idx$idx_moleculeloc=="i7" & idx$idx_moleculesrc=="Tn5",]
colnames(idx_tn5i5)<-c("tn5_i5_idx_name","idx_cycles","tn5_i5_idx_seq","row","idx_moleculesrc","tn5_i5","idx_moleculeloc")
colnames(idx_tn5i7)<-c("tn5_i7_idx_name","idx_cycles","tn5_i7_idx_seq","column","idx_moleculesrc","tn5_i7","idx_moleculeloc")
idx_tn5i5<-idx_tn5i5[,c(1,3,4,6)]
idx_tn5i7<-idx_tn5i7[,c(1,3,4,6)]
#generate cellID list and split cell id by cycle
compl_1$cellID<-paste0(compl_1$cellID,"_1")
compl_2$cellID<-paste0(compl_2$cellID,"_2")
compl_3$cellID<-paste0(compl_3$cellID,"_3")
cellid<-rbind(compl_1,compl_2,compl_3)
cellid<-as.data.frame(cellid$cellID)
colnames(cellid)<-"cellID"
cellid$tn5_i7_idx_seq<-substr(cellid$cellID,1,8)
cellid$pcr_i7_idx_seq<-substr(cellid$cellID,9,18)
cellid$tn5_i5_idx_seq<-substr(cellid$cellID,19,26)
cellid$pcr_i5_idx_seq<-substr(cellid$cellID,27,36)
cellid$prep<-substr(cellid$cellID,38,38)
#generate long format organoid tn5 data (for second plate)
dat<-data.frame()
for (i in second_prep_tn5_plates) {
tn5_plate<-strsplit(strsplit(i,"_")[[1]][-1],"[.]")[[2]][1]
tmp<-read.table(paste0(second_prep_annot_path,"/",i),header=T,row.names=1)
tmp$row<-row.names(tmp)
tmp<-melt(tmp)
tmp$tn5_plate<-tn5_plate
ifelse(nrow(dat)==0,dat<-tmp,dat<-rbind(dat,tmp))
}
colnames(dat)<-c("row","column","orgID","tn5_plate")
dat$tn5_plate<-toupper(dat$tn5_plate)
dat$tn5_i5<-substr(dat$tn5_plate,1,1)
dat$tn5_i7<-substr(dat$tn5_plate,2,2)
dat$column<-substr(dat$column,2,5)
#merge organoid tn5 data with tn5 i5 index
dat<-merge(dat,idx_tn5i5,by=c("tn5_i5","row"),all.x=T)
#merge organoid tn5 data with tn5 i7 index
dat<-merge(dat,idx_tn5i7,by=c("tn5_i7","column"),all.x=T)
#merge organoid data with organoid key
dat<-merge(dat,second_prep_orgid,by="orgID")
#merge organoid data with complexity file
compl<-rbind(compl_2,compl_3)
compl<-merge(compl,cellid[cellid$prep!=1,],by="cellID")
dat<-merge(dat,compl,by=c("tn5_i5_idx_seq","tn5_i7_idx_seq"))
#apply read filter used on each bam
dat<-dat[dat$uniq_reads>=1000,]
write.table(dat,file="second_prep_summary_statistics_per_cell.tsv",col.names=T,row.names=F,quote=F,sep="\t")
tn5plate_annot<-dat[,c("cellID","tn5_plate")]
orgID_annot<-dat[,c("cellID","orgID")]
diffexp_annot<-dat[,c("cellID","differentiation_exp")]
cellline_annot<-dat[,c("cellID","cell_line")]
div_annot<-dat[,c("cellID","DIV")]
write.table(tn5plate_annot,file=paste0(second_prep_annot_path,"/","second_prep_tn5plate.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(orgID_annot,file=paste0(second_prep_annot_path,"/","second_prep_orgID.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(diffexp_annot,file=paste0(second_prep_annot_path,"/","second_prep_diffexp.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(cellline_annot,file=paste0(second_prep_annot_path,"/","second_prep_cellline.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(div_annot,file=paste0(second_prep_annot_path,"/","second_prep_div.annot"),sep="\t",col.names=F,row.names=F,quote=F)
#generate long format organoid tn5 data (for first plate)
#merge organoid data with complexity file
compl<-compl_1
compl<-merge(compl,cellid[cellid$prep==1,],by="cellID")
dat<-data.frame()
for (i in first_prep_tn5_plates) {
tn5_plate<-strsplit(strsplit(i,"_")[[1]][-1],"[.]")[[2]][1]
tmp<-read.table(paste0(first_prep_annot_path,"/",i),header=T,row.names=1)
colnames(tmp)<-paste0("X",seq(1,12))
tmp$row<-row.names(tmp)
tmp<-melt(tmp,id.vars="row")
tmp$tn5_plate<-tn5_plate
ifelse(nrow(dat)==0,dat<-tmp,dat<-rbind(dat,tmp))
}
colnames(dat)<-c("row","column","orgID","tn5_plate")
dat$tn5_plate<-toupper(dat$tn5_plate)
dat$tn5_i5<-substr(dat$tn5_plate,1,1)
dat$tn5_i7<-substr(dat$tn5_plate,2,2)
dat$column<-substr(dat$column,2,5)
#merge organoid tn5 data with tn5 i5 index
dat<-merge(dat,idx_tn5i5,by=c("tn5_i5","row"),all.x=T)
#merge organoid tn5 data with tn5 i7 index
dat<-merge(dat,idx_tn5i7,by=c("tn5_i7","column"),all.x=T)
#merge organoid data with organoid key
dat<-merge(dat,first_prep_orgid,by="orgID")
dat<-merge(dat,compl,by=c("tn5_i5_idx_seq","tn5_i7_idx_seq"))
#apply read filter used on each bam
dat<-dat[dat$uniq_reads>=1000,]
write.table(dat,file="first_prep_summary_statistics_per_cell.tsv",col.names=T,row.names=F,quote=F,sep="\t")
tn5plate_annot<-dat[,c("cellID","tn5_plate")]
orgID_annot<-dat[,c("cellID","orgID")]
diffexp_annot<-dat[,c("cellID","differentiation_exp")]
cellline_annot<-dat[,c("cellID","cell_line")]
div_annot<-dat[,c("cellID","DIV")]
sortgate_annot<-dat[,c("cellID","sort_gate")]
freezing_annot<-dat[,c("cellID","freezing_protocol")]
treatment_annot<-dat[,c("cellID","treatment")]
organoid_annot<-dat[,c("cellID","organoid")]
write.table(tn5plate_annot,file=paste0(first_prep_annot_path,"/","first_prep_tn5plate.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(orgID_annot,file=paste0(first_prep_annot_path,"/","first_prep_orgID.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(diffexp_annot,file=paste0(first_prep_annot_path,"/","first_prep_diffexp.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(cellline_annot,file=paste0(first_prep_annot_path,"/","first_prep_cellline.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(div_annot,file=paste0(first_prep_annot_path,"/","first_prep_div.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(sortgate_annot,file=paste0(first_prep_annot_path,"/","first_prep_sortgate.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(freezing_annot,file=paste0(first_prep_annot_path,"/","first_prep_freezeprotocol.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(treatment_annot,file=paste0(first_prep_annot_path,"/","first_prep_treatment.annot"),sep="\t",col.names=F,row.names=F,quote=F)
write.table(organoid_annot,file=paste0(first_prep_annot_path,"/","first_prep_organoid.annot"),sep="\t",col.names=F,row.names=F,quote=F)
Tabix file format is a tab separated multicolumn data structure.
Column Number | Name | Description |
---|---|---|
1 | chrom | Reference genome chromosome of fragment |
2 | chromStart | Adjusted start position of fragment on chromosome. |
3 | chromEnd | Adjusted end position of fragment on chromosome. The end position is exclusive, so represents the position immediately following the fragment interval. |
4 | barcode | The 10x (or sci) cell barcode of this fragment. This corresponds to the CB tag attached to the corresponding BAM file records for this fragment. |
5 | duplicateCount | The number of PCR duplicate read pairs observed for this fragment. Sequencer-created duplicates, such as Exclusion Amp duplicates created by the NovaSeq instrument are excluded from this count. |
input_bam="orgo.ID.bam"
output_name="orgo"
tabix="/home/groups/oroaklab/src/cellranger-atac/cellranger-atac-1.1.0/miniconda-atac-cs/4.3.21-miniconda-atac-cs-c10/bin/tabix"
bgzip="/home/groups/oroaklab/src/cellranger-atac/cellranger-atac-1.1.0/miniconda-atac-cs/4.3.21-miniconda-atac-cs-c10/bin/bgzip"
samtools view --threads 10 $input_bam | awk 'OFS="\t" {split($1,a,":"); print $3,$4,$8,a[1],1}' | sort -S 2G -T . --parallel=30 -k1,1 -k2,2n -k3,3n | $bgzip > $output_name.fragments.tsv.gz
$tabix -p bed $output_name.fragments.tsv.gz &
Using R v4.0 and Signac v1.0 for processing.
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
read_in_sparse<-function(x){ #x is character file prefix followed by .bbrd.q10.500.counts.sparseMatrix.values.gz
IN<-as.matrix(read.table(paste0(x,".counts.sparseMatrix.values.gz")))
IN<-sparseMatrix(i=IN[,1],j=IN[,2],x=IN[,3])
COLS<-read.table(paste0(x,".counts.sparseMatrix.cols.gz"))
colnames(IN)<-COLS$V1
ROWS<-read.table(paste0(x,".counts.sparseMatrix.rows.gz"))
row.names(IN)<-ROWS$V1
writeMM(IN,file=paste0(x,".counts.mtx")) #this is to generate counts matrices in scrublet friendly format
return(IN)
}
orgo_counts<-read_in_sparse("orgo.500") # make counts matrix from sparse matrix
#Read in fragment path for coverage plots
orgo_fragment.path="./orgo.fragments.tsv.gz"
# extract gene annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
# change to UCSC style since the data was mapped to hg38
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"
#Generate ChromatinAssay Objects
orgo_chromatinassay <- CreateChromatinAssay(
counts = orgo_counts,
genome="hg38",
min.cells = 1,
annotation=annotations,
sep=c("_","_"),
fragments=orgo_fragment.path
)
#Create Seurat Object
orgo_atac <- CreateSeuratObject(
counts = orgo_chromatinassay,
assay = "peaks",
)
#Meta.data to be updated after clustering
#saving unprocessed SeuratObject
saveRDS(orgo_atac,file="orgo_SeuratObject.Rds")
Code from tutorial here.[https://github.com/AllonKleinLab/scrublet/blob/master/examples/scrublet_basics.ipynb]
#using a conda environment set up by ARSN
source /home/groups/oroaklab/nishida/scitools_env/bin/activate
#Installing scrublet
#pip install scrublet
import scrublet as scr
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy.sparse import coo_matrix
import gzip
import pandas as pd
#Load the raw counts matrix as a scipy sparse matrix with cells as rows and genes as columns.
input_dir = '/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis/'
#Perform on hg38 cells
counts_matrix = scipy.io.mmread(input_dir + 'orgo.500.counts.mtx').T.tocsc() #generated during the initialization of the Seurat Object
peaks= np.array(gzip.open(input_dir+'orgo.500.counts.sparseMatrix.rows.gz', 'rt').read().split()) #This is read in to check that our data frame is in the correct orientation
cellid= gzip.open(input_dir+'orgo.500.counts.sparseMatrix.cols.gz', 'rt').read().split() #This is read in to check that our data frame is in the correct orientation
print('Counts matrix shape: {} rows, {} columns'.format(counts_matrix.shape[0], counts_matrix.shape[1]))
print('Number of genes in gene list: {}'.format(len(peaks)))
#Run scrublet
scrub = scr.Scrublet(counts_matrix, expected_doublet_rate=0.05)
#Preprocessing...
#Simulating doublets...
#Embedding transcriptomes using PCA...
#Calculating doublet scores...
#Automatically set threshold at doublet score = 0.07
#Detected doublet rate = 31.1%
#Estimated detectable doublet fraction = 60.5%
#Overall doublet rate:
# Expected = 5.0%
# Estimated = 51.4%
#Elapsed time: 785.7 seconds
doublet_scores, predicted_doublets = scrub.scrub_doublets(min_counts=2,
min_cells=3,
min_gene_variability_pctl=85,
n_prin_comps=30)
df = pd.DataFrame({'cellid':cellid, 'doublet_scores':doublet_scores,'predicted_doublets':predicted_doublets})
df.to_csv('orgo.scrublet.tsv', index=False, sep="\t")
#renaming annot for simplified annotation file making
#rename processing_ processing. *annot
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
#Set up annotation summaries to contain the same information, in same column order
first_annot_append<-read.table("first_prep_summary_statistics_per_cell.tsv",header=T)
#reorder for consistent metadata
first_annot_append<-first_annot_append[c("cellID",
"tn5_plate",
"column","row",
"tn5_i5","tn5_i5_idx_name","tn5_i5_idx_seq",
"tn5_i7","tn5_i7_idx_name","tn5_i7_idx_seq",
"pcr_i7_idx_seq","pcr_i5_idx_seq",
"total_reads","uniq_reads","perc_uniq",
"prep","orgID","cell_line","differentiation_exp","DIV",
"freezing_protocol","sort_gate","treatment","organoid")]
second_annot_append<-read.table("second_prep_summary_statistics_per_cell.tsv",header=T)
second_annot_append$freezing_protocol<-"Flash_Frozen" #change this for the DIV90 cirm 43 diff exp 5 organoids
second_annot_append[(second_annot_append$DIV=="90" & second_annot_append$differentiation_exp=="5"),]$freezing_protocol<-"Slow_Freeze"
second_annot_append$sort_gate<-"NA"
second_annot_append$treatment<-"No"
second_annot_append$organoid<-second_annot_append$orgID
second_annot_append<-second_annot_append[c("cellID",
"tn5_plate",
"column","row",
"tn5_i5","tn5_i5_idx_name","tn5_i5_idx_seq",
"tn5_i7","tn5_i7_idx_name","tn5_i7_idx_seq",
"pcr_i7_idx_seq","pcr_i5_idx_seq",
"total_reads","uniq_reads","perc_uniq",
"prep","orgID","cell_line","differentiation_exp","DIV",
"freezing_protocol","sort_gate","treatment","organoid")]
annot_append<-rbind(first_annot_append,second_annot_append)
#orgID and prep need to be accounted for to get unique organoids (there are duplicates in orgID)
#Add original cluster information
original_cluster<-read.table("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/180703_sciATAC_Organoids/NatureLetterData/cellID_fullcharacterization.txt",header=T)
original_cluster$cellID<-paste0(original_cluster$cellID,"_1")
original_cluster<-original_cluster[c("cellID","Phenograph_Cluster")]
colnames(original_cluster)<-c("cellID","original_cluster")
orgo_atac<-readRDS(file="orgo_SeuratObject.Rds")
orgo_atac@meta.data$cellID<-row.names(orgo_atac@meta.data)
#Add scrublet info
orgo_scrub<-read.table("orgo.scrublet.tsv",header=T) #read in scrublet
#Add complexity info
compl_1<-read.table("source_fastq/preprocessing_files/orgo_prep1_1.complexity.txt",head=F)
colnames(compl_1)<-c("cellID","total_reads","unique_reads","percent_unique_reads")
compl_1$cellID<-paste0(compl_1$cellID,"_1")
compl_2<-read.table("source_fastq/preprocessing_files/orgo_prep2_1.complexity.txt",head=F)
colnames(compl_2)<-c("cellID","total_reads","unique_reads","percent_unique_reads")
compl_2$cellID<-paste0(compl_2$cellID,"_2")
compl_3<-read.table("source_fastq/preprocessing_files/orgo_prep2_2.complexity.txt",head=F)
colnames(compl_3)<-c("cellID","total_reads","unique_reads","percent_unique_reads")
compl_3$cellID<-paste0(compl_3$cellID,"_3")
compl<-rbind(compl_1,compl_2,compl_3)
#Add TSS enrichment value
tss_enrich<-read.table("orgo.ID.TSSenrich.value",header=F)
colnames(tss_enrich)<-c("cellID","tss_enrichment")
#merge all data frames
annot<-as.data.frame(orgo_atac@meta.data)
annot<-merge(annot,annot_append,by="cellID",all.x=T)
annot<-merge(annot,original_cluster,by="cellID",all.x=T)
annot<-merge(annot,orgo_scrub,by.x="cellID",by.y="cellid",all.x=T)
#annot<-merge(annot,compl,by="cellID",all.x=T)
annot<-merge(annot,tss_enrich,by="cellID",all.x=T)
row.names(annot)<-annot$cellID
orgo_atac@meta.data<-annot
#Add FRIP to meta data
frip<-read.table("orgo.500.fracOnTarget.values")
colnames(frip)<-c("cellID","frip")
row.names(frip)<-frip$cellID
frip<-frip[frip$cellID %in% row.names(orgo_atac@meta.data),]
frip_names<-setNames(frip$frip,nm=frip$cellID)
orgo_atac<-AddMetaData(object=orgo_atac,col.name="FRIP",metadata=frip_names)
#excluding differentiation experiment 4
orgo_atac<-subset(orgo_atac, differentiation_exp %in% c("5","7"))
orgo_cirm43<-subset(orgo_cirm43,DIV %in% c("15","30","60","90"))
orgo_atac<-subset(orgo_atac,cell_line=="CIRM43") #just cirm43 cell line and two differentiations
saveRDS(orgo_atac,file="orgo_cirm43.SeuratObject.Rds")
library(Signac)
library(Seurat)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
set.seed(1234)
#lowering cores to be used by chromvar to 10
library(BiocParallel)
register(MulticoreParam(10))
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
#Read in data and modify to monocle CDS file
#read in RDS file.
orgo_cirm43<-readRDS("orgo_cirm43.SeuratObject.Rds")
# Get a list of motif position frequency matrices from the JASPAR database
pfm <- getMatrixSet(x = JASPAR2020, opts = list(species =9606, all_versions = FALSE))
# Scan the DNA sequence of each peak for the presence of each motif, using orgo_atac for all objects (shared peaks)
motif.matrix <- CreateMotifMatrix(features = granges(orgo_cirm43), pwm = pfm, genome = 'hg38', use.counts = FALSE)
# Create a new Mofif object to store the results
motif <- CreateMotifObject(data = motif.matrix, pwm = pfm)
# Add the Motif object to the assays and run ChromVar
orgo_cirm43 <- SetAssayData(object = orgo_cirm43, assay = 'peaks', slot = 'motifs', new.data = motif)
orgo_cirm43 <- RegionStats(object = orgo_cirm43, genome = BSgenome.Hsapiens.UCSC.hg38)
orgo_cirm43 <- RunChromVAR( object = orgo_cirm43,genome = BSgenome.Hsapiens.UCSC.hg38)
saveRDS(orgo_cirm43,file="orgo_cirm43.SeuratObject.chromvar.Rds")
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(cisTopic)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.SeuratObject.chromvar.Rds")
cistopic_processing<-function(seurat_input,prefix){
cistopic_counts_frmt<-seurat_input$peaks@counts #grabbing counts matrices
row.names(cistopic_counts_frmt)<-sub("-", ":", row.names(cistopic_counts_frmt)) #renaming row names to fit granges expectation of format
atac_cistopic<-cisTopic::createcisTopicObject(cistopic_counts_frmt) #set up CisTopicObjects
#Run warp LDA on objects
atac_cistopic_models<-cisTopic::runWarpLDAModels(atac_cistopic,topic=c(20:30),nCores=11,addModels=FALSE)
#Setting up topic count selection
pdf(paste(prefix,"model_selection.pdf",sep="."))
par(mfrow=c(1,3))
cirm43_cistopic_models <- selectModel(atac_cistopic_models, type='derivative')
dev.off()
system(paste0("slack -F ",paste(prefix,"model_selection.pdf",sep=".")," ryan_todo"))
print("Saving cistopic models.")
saveRDS(atac_cistopic_models,file=paste(prefix,"CisTopicObject.Rds",sep="."))
}
cistopic_processing(seurat_input=orgo_cirm43,prefix="orgo_cirm43")
cirm43_cistopic_models<-readRDS("orgo_cirm43.CisTopicObject.Rds")
###############################################
#Loop through cistopic models
cistopic_loop<-function(topic_number,object_input,models_input){
models_input<-selectModel(models_input,select=topic_number)
#perform UMAP on topics
topic_df<-as.data.frame(models_input@selected.model$document_expects)
row.names(topic_df)<-paste0("Topic_",row.names(topic_df))
dims<-as.data.frame(uwot::umap(t(topic_df),n_components=2))
print("Performed UMAP.")
row.names(dims)<-colnames(topic_df)
colnames(dims)<-c("x","y")
dims$cellID<-row.names(dims)
dims<-merge(dims,object_input@meta.data,by.x="cellID",by.y="row.names")
#combine with seurat object
umap_dims<-as.data.frame(as.matrix(dims[2:3]))
colnames(umap_dims)<-c("UMAP_1","UMAP_2")
row.names(umap_dims)<-dims$cellID
cistopic_umap<-CreateDimReducObject(embeddings=as.matrix(umap_dims),assay="peaks",key="UMAP_")
object_input@reductions$umap<-cistopic_umap
#finally plot
plt<-DimPlot(object_input,group.by=c('DIV','cell_line'),size=0.1)+ggtitle(as.character(topic_number))
return(plt)
}
library(patchwork)
plt_list<-lapply(cirm43_cistopic_models@calc.params$runWarpLDAModels$topic,
FUN=cistopic_loop,
object_input=orgo_cirm43,
models_input=cirm43_cistopic_models)
plt_list<-wrap_plots(plt_list)
ggsave(plt_list,file="cirm43.umap_multipleTopicModels_clustering.png",height=20,width=60,limitsize=FALSE)
system("slack -F cirm43.umap_multipleTopicModels_clustering.png ryan_todo")
###############################################
#set topics based on derivative
cirm43_cisTopicObject<-cisTopic::selectModel(cirm43_cistopic_models,type="derivative",keepModels=T)
#saving model selected RDS
saveRDS(cirm43_cisTopicObject,file="orgo_cirm43.CisTopicObject.Rds")
####Function to include topics and umap in seurat object
cistopic_wrapper<-function(object_input=orgo_atac,cisTopicObject=orgo_cisTopicObject,resolution=0.8){
#run UMAP on topics
topic_df<-as.data.frame(cisTopicObject@selected.model$document_expects)
row.names(topic_df)<-paste0("Topic_",row.names(topic_df))
dims<-as.data.frame(uwot::umap(t(topic_df),n_components=2))
row.names(dims)<-colnames(topic_df)
colnames(dims)<-c("x","y")
dims$cellID<-row.names(dims)
dims<-merge(dims,object_input@meta.data,by.x="cellID",by.y="row.names")
#Add cell embeddings into seurat
cell_embeddings<-as.data.frame(cisTopicObject@selected.model$document_expects)
colnames(cell_embeddings)<-cisTopicObject@cell.names
n_topics<-nrow(cell_embeddings)
row.names(cell_embeddings)<-paste0("topic_",1:n_topics)
cell_embeddings<-as.data.frame(t(cell_embeddings))
#Add feature loadings into seurat
feature_loadings<-as.data.frame(cisTopicObject@selected.model$topics)
row.names(feature_loadings)<-paste0("topic_",1:n_topics)
feature_loadings<-as.data.frame(t(feature_loadings))
#combined cistopic results (cistopic loadings and umap with seurat object)
cistopic_obj<-CreateDimReducObject(embeddings=as.matrix(cell_embeddings),loadings=as.matrix(feature_loadings),assay="peaks",key="topic_")
umap_dims<-as.data.frame(as.matrix(dims[2:3]))
colnames(umap_dims)<-c("UMAP_1","UMAP_2")
row.names(umap_dims)<-dims$cellID
cistopic_umap<-CreateDimReducObject(embeddings=as.matrix(umap_dims),assay="peaks",key="UMAP_")
object_input@reductions$cistopic<-cistopic_obj
object_input@reductions$umap<-cistopic_umap
n_topics<-ncol(Embeddings(object_input,reduction="cistopic"))
object_input <- FindNeighbors(
object = object_input,
reduction = 'cistopic',
dims = 1:n_topics
)
object_input <- FindClusters(
object = object_input,
verbose = TRUE,
resolution=resolution
)
return(object_input)}
orgo_cirm43<-cistopic_wrapper(object_input=orgo_cirm43,cisTopicObject=cirm43_cisTopicObject,resolution=0.2)
plt<-DimPlot(orgo_cirm43,group.by=c("DIV","peaks_snn_res.0.2"))
ggsave(plt,file="test.umap.pdf")
system("slack -F test.umap.pdf ryan_todo")
saveRDS(orgo_cirm43,file="orgo_cirm43.SeuratObject.Rds") ###save Seurat file
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(dplyr)
library(patchwork)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.SeuratObject.Rds")
saveRDS(orgo_cirm43,"orgo_cirm43.preQC2.SeuratObject.Rds")
orgo_cirm43<-readRDS("orgo_cirm43.preQC2.SeuratObject.Rds")
#Cluster summaries
dat<-orgo_cirm43@meta.data
dat_sum<-as.data.frame(dat %>%
group_by(orgID,seurat_clusters,differentiation_exp,DIV) %>%
summarize(count=n()))
write.table(dat_sum,"cirm43_cluster_summary_statistics.tsv",col.names=T,row.names=T,quote=F,sep="\t")
plt<-ggplot(orgo_cirm43@meta.data,aes(x=orgID,fill=seurat_clusters))+geom_bar(position="fill")+facet_wrap(differentiation_exp~DIV,scales="free_x")+theme_minimal()
ggsave(plt,file="cirm43.qc.cellcount.perseuratcluster.pdf",width=10,height=5)
system(paste0("slack -F cirm43.qc.cellcount.perseuratcluster.pdf ryan_todo")) #looks good. note pitstop treatment was only done on div90 so some bias is to be expected
#Setting doublets as those in top 5% of doublet_score
threshold_number<-as.numeric(quantile(x=orgo_cirm43@meta.data$doublet_scores,prob=0.95))
orgo_cirm43$predicted_doublets<-"False"
orgo_cirm43@meta.data[as.numeric(orgo_cirm43@meta.data$doublet_scores)>=threshold_number,]$predicted_doublets<-"True"
#Testing organoids for tss enrichment
plt<-ggplot()+geom_histogram(aes(x=orgo_cirm43$tss_enrichment),bins=100)+theme_minimal()+geom_vline(xintercept=1.5,color="red")
ggsave(plt,file="cirm43.tssenrich.pdf")
system("slack -F cirm43.tssenrich.pdf ryan_todo")#post to ryan_todo
#filtering out cells with less than 1.5 for tss enrichment
#Testing for uneven distribution of cells by pitstop treatment
plt<-ggplot(dat[dat$DIV=="90" & dat$differentiation_exp=="5",],aes(fill=treatment,y=1,x=seurat_clusters))+geom_bar(position="fill",stat="identity")
ggsave(plt,file="pitstop_cluster_distribution.pdf",width=40,height=30,limitsize=F)
system(paste0("slack -F pitstop_cluster_distribution.pdf ryan_todo")) #looks good. note pitstop treatment was only done on div90 so some bias is to be expected
orgo_cirm43$uniq_orgID<-paste(orgo_cirm43$differentiation_exp,orgo_cirm43$DIV,orgo_cirm43$orgID,sep=" ")
orgo_cirm43$log10_uniq_reads<-log10(orgo_cirm43$uniq_reads)
orgo_cirm43$pass_qc<-"True"
orgo_cirm43@meta.data[(orgo_cirm43@meta.data$predicted_doublets=="True"),]$pass_qc<-"False" #predicted doublets
orgo_cirm43@meta.data[(orgo_cirm43@meta.data$tss_enrichment<1.5),]$pass_qc<-"False" #low tss enrichment
plt<-ggplot(orgo_cirm43@meta.data,aes(factor(orgID),fill=pass_qc))+geom_bar()+facet_wrap(differentiation_exp~DIV,scales="free_x")+theme_minimal()
ggsave(plt,file=paste0("cirm43.qc.cellcount.preqc.umap.pdf"),width=10,height=5)
system(paste0("slack -F ",paste0("cirm43.qc.cellcount.preqc.umap.pdf")," ryan_todo"))
table(orgo_cirm43@meta.data[(orgo_cirm43@meta.data$orgID==3 & orgo_cirm43@meta.data$differentiation_exp==5),]$pass_qc)
#False True
# 349 101
orgo_cirm43@meta.data[(orgo_cirm43@meta.data$orgID==3 & orgo_cirm43@meta.data$differentiation_exp==5),]$pass_qc<-"False" #high percentage of failed cells
table(orgo_cirm43$pass_qc)
#False True
# 3323 32267
for ( i in c('DIV','prep','uniq_orgID',"treatment",'differentiation_exp','seurat_clusters','predicted_doublets','pass_qc')){
plt<-DimPlot(orgo_cirm43,group.by=i,size=0.1)
ggsave(plt,file=paste0("cirm43.qc.",i,"preqc.umap.pdf"),width=5,height=5)
system(paste0("slack -F ",paste0("cirm43.qc.",i,"preqc.umap.pdf")," ryan_todo"))}
for (i in c("doublet_scores","tss_enrichment","log10_uniq_reads")){
plt<-FeaturePlot(orgo_cirm43,feat=i,col=c("white","black"),pt.size=0.1,order=T)
ggsave(plt,file=paste0("cirm43.qc.",i,"preqc.umap.pdf"),width=5,height=5)
system(paste0("slack -F ",paste0("cirm43.qc.",i,"preqc.umap.pdf")," ryan_todo"))}
orgo_cirm43_pit<-subset(orgo_cirm43,differentiation_exp=="5")
orgo_cirm43_pit<-subset(orgo_cirm43_pit,DIV=="90")
plt1<-DimPlot(orgo_cirm43_pit,group.by=c('treatment'),size=0.1)
ggsave(plt1,file="cirm43.exp5.div90.pitstop.pdf",limitsize=F,width=5,height=5)
system(paste0("slack -F cirm43.exp5.div90.pitstop.pdf ryan_todo"))#post to ryan_todo
library(dplyr)
data.frame(orgo_cirm43_pit@meta.data) %>% group_by(treatment) %>% summarize(mean=mean(uniq_reads))
# A tibble: 3 x 2
# treatment mean
# <chr> <dbl>
#1 No 17891.
#2 Pitstop2 39128.
#3 Pitstop2_Digi 46146
anova(x=as.numeric(orgo_cirm43_pit$uniq_reads),y=as.factor(orgo_cirm43_pit$treatment))
dat_sum<-as.data.frame(orgo_cirm43@meta.data %>%
group_by(orgID,differentiation_exp,DIV,pass_qc) %>%
summarize(count=n()))
orgo_qc<-subset(orgo_cirm43,pass_qc=="True")
saveRDS(orgo_qc,file="orgo_cirm43.QC2.SeuratObject.Rds") #save QC passing cells seurat object
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(dplyr)
library(patchwork)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
library(rGREAT)
library(parallel)
orgo_cirm43<-readRDS("orgo_cirm43.preQC2.SeuratObject.Rds")
#define DA functions for parallelization
#Use LR test for atac data
#using the top 3 populated clusters for orgID 3
organoid_3_differences<-function(i,obj.=orgo_cirm43,group,assay.="peaks"){
Idents(obj.)<-obj.$seurat_clusters
obj<-subset(obj.,seurat_clusters==i)
Idents(obj)<-obj$orgID
da_peaks_tmp <- FindMarkers(
object = obj,
ident.1 = "3", #orgID 3
group.by = group,
test.use = 'LR',
latent.vars = 'nCount_peaks',
only.pos=F,
assay=assay.,
logfc.threshold=0
)
da_peaks_tmp$da_region<-row.names(da_peaks_tmp)
closest_genes <- ClosestFeature(obj,da_peaks_tmp$da_region)
da_peaks_tmp<-cbind(da_peaks_tmp,closest_genes)
da_peaks_tmp$enriched_group<-c("3")
da_peaks_tmp$cluster<-c(i)
da_peaks_tmp$compared_group<-c("all_other_cells")
return(da_peaks_tmp)
}
out_4<-organoid_3_differences(obj.=orgo_cirm43,i=4,group="orgID")
write.table(out_4,file="org3_da_peaks_cluster4.tsv",sep="\t",quote=F,col.names=T,row.names=T)
out_5<-organoid_3_differences(obj.=orgo_cirm43,i=5,group="orgID")
write.table(out_5,file="org3_da_peaks_cluster5.tsv",sep="\t",quote=F,col.names=T,row.names=T)
out_7<-organoid_3_differences(obj.=orgo_cirm43,i=7,group="orgID")
write.table(out_7,file="org3_da_peaks_cluster7.tsv",sep="\t",quote=F,col.names=T,row.names=T)
out<-rbind(out_5,out_7,out_4)
#Significant genes:
#7
#EMB
#SHCBP1 #RG dividing
#CWH43
#4
#EMB
#SHCBP1
#5
#EMB
#CWH43
#ZNF33B #widely expressed
#Running GREAT to see if there is enrichment in the nominally significant differences
#format data as bed file all seurat objects have the same peak list
write("Preparing Background Set as all called peaks.", stderr())
orgo_bg_bed<-do.call("rbind",strsplit(unlist(orgo_cirm43@assays$peaks@counts@Dimnames[1]),"[-]"))
orgo_bg_bed<-as.data.frame(orgo_bg_bed)
colnames(orgo_bg_bed)<-c("chr","start","end")
orgo_bg_bed$start<-as.numeric(as.character(orgo_bg_bed$start))
orgo_bg_bed$end<-as.numeric(as.character(orgo_bg_bed$end))
orgo_bg_bed<-makeGRangesFromDataFrame(orgo_bg_bed)
dir.create("./GREAT_analysis.PreQC")
write("Beginning loop through all annotation groups.", stderr())
great_processing<-function(enriched_group_input,peak_dataframe,prefix,bg){
#subset bed file to peaks enriched in input group
orgo_bed<-as.data.frame(do.call("rbind",strsplit(peak_dataframe[peak_dataframe$cluster==enriched_group_input,]$da_region,"-")))
colnames(orgo_bed)<-c("chr","start","end")
orgo_bed$start<-as.numeric(as.character(orgo_bed$start))
orgo_bed$end<-as.numeric(as.character(orgo_bed$end))
nrow(orgo_bed)
orgo_bed<-orgo_bed[!duplicated(orgo_bed),]
row_count<-nrow(orgo_bed)
orgo_bed$width<-orgo_bed$end-orgo_bed$start
orgo_bed<-makeGRangesFromDataFrame(orgo_bed)
#run GREAT using all peaks as background
write(paste("Using",row_count, "DA peaks from",enriched_group_input), stderr())
job = submitGreatJob(orgo_bed,bg=bg,species="hg38",request_interval=30)
tb = getEnrichmentTables(job, ontology = c("GO Molecular Function", "GO Biological Process","GO Cellular Component"))
tb = getEnrichmentTables(job, category = c("GO","Phenotype","Genes"))
#Plot gene association
pdf(paste0("./GREAT_analysis.PreQC/",prefix,"_DApeaks_",enriched_group_input,".GeneAssociation.pdf"))
plotRegionGeneAssociationGraphs(job)
dev.off()
for (j in 1:length(names(tb))){
write(paste("Outputting DA GREAT Analysis for", enriched_group_input, as.character(names(tb))[j]), stderr())
tabl_name<-gsub(" ","",as.character(names(tb))[j])
write.table(as.data.frame(tb[[j]]),file=paste0("./GREAT_analysis.PreQC/",prefix,"_DApeaks_",enriched_group_input,".",tabl_name,".txt"),sep="\t",col.names=T,row.names=T,quote=F)
}
}
mclapply(unique(out$cluster), FUN=function(x){great_processing(enriched_group_input=x,peak_dataframe=out,prefix="orgID3",bg=orgo_bg_bed)},mc.cores=3)
#Using 27 DA peaks from 5
#Using 572 DA peaks from 7
#Using 39 DA peaks from 4
Now to rerun cistopic clustering with cells that pass QC metrics.
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(cisTopic)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds") #reading in QC passing cells
cistopic_processing<-function(seurat_input,prefix){
cistopic_counts_frmt<-seurat_input$peaks@counts #grabbing counts matrices
row.names(cistopic_counts_frmt)<-sub("-", ":", row.names(cistopic_counts_frmt)) #renaming row names to fit granges expectation of format
atac_cistopic<-cisTopic::createcisTopicObject(cistopic_counts_frmt) #set up CisTopicObjects
#Run warp LDA on objects
atac_cistopic_models<-cisTopic::runWarpLDAModels(atac_cistopic,topic=c(20:30),nCores=11,addModels=FALSE)
#Setting up topic count selection
pdf(paste(prefix,"model_selection.pdf",sep="."))
par(mfrow=c(1,3))
cirm43_cistopic_models <- selectModel(atac_cistopic_models, type='derivative')
dev.off()
system(paste0("slack -F ",paste(prefix,"model_selection.pdf",sep=".")," ryan_todo"))
print("Saving cistopic models.")
saveRDS(atac_cistopic_models,file=paste(prefix,"CisTopicObject.Rds",sep="."))
}
cistopic_processing(seurat_input=orgo_cirm43,prefix="orgo_cirm43.QC2")
cirm43_cistopic_models<-readRDS("orgo_cirm43.QC2.CisTopicObject.Rds")
###############################################
#Loop through cistopic models
cistopic_loop<-function(topic_number,object_input,models_input){
models_input<-selectModel(models_input,select=topic_number)
#perform UMAP on topics
topic_df<-as.data.frame(models_input@selected.model$document_expects)
row.names(topic_df)<-paste0("Topic_",row.names(topic_df))
dims<-as.data.frame(uwot::umap(t(topic_df),n_components=2))
print("Performed UMAP.")
row.names(dims)<-colnames(topic_df)
colnames(dims)<-c("x","y")
dims$cellID<-row.names(dims)
dims<-merge(dims,object_input@meta.data,by.x="cellID",by.y="row.names")
#combine with seurat object
umap_dims<-as.data.frame(as.matrix(dims[2:3]))
colnames(umap_dims)<-c("UMAP_1","UMAP_2")
row.names(umap_dims)<-dims$cellID
cistopic_umap<-CreateDimReducObject(embeddings=as.matrix(umap_dims),assay="peaks",key="UMAP_")
object_input@reductions$umap<-cistopic_umap
#finally plot
plt<-DimPlot(object_input,group.by=c('DIV','cell_line'),size=0.1)+ggtitle(as.character(topic_number))
return(plt)
}
library(patchwork)
plt_list<-lapply(cirm43_cistopic_models@calc.params$runWarpLDAModels$topic,
FUN=cistopic_loop,
object_input=orgo_cirm43,
models_input=cirm43_cistopic_models)
plt_list<-wrap_plots(plt_list)
ggsave(plt_list,file="cirm43.qc2.umap_multipleTopicModels_clustering.png",height=20,width=60,limitsize=FALSE)
system("slack -F cirm43.qc2.umap_multipleTopicModels_clustering.png ryan_todo")
###############################################
#set topics based on derivative
cirm43_cisTopicObject<-cisTopic::selectModel(cirm43_cistopic_models,type="derivative",keepModels=T)
#saving model selected RDS
saveRDS(cirm43_cisTopicObject,file="orgo_cirm43.QC2.CisTopicObject.Rds")
####Function to include topics and umap in seurat object
cistopic_wrapper<-function(object_input=orgo_atac,cisTopicObject=orgo_cisTopicObject,resolution=0.8){
#run UMAP on topics
topic_df<-as.data.frame(cisTopicObject@selected.model$document_expects)
row.names(topic_df)<-paste0("Topic_",row.names(topic_df))
dims<-as.data.frame(uwot::umap(t(topic_df),n_components=2))
row.names(dims)<-colnames(topic_df)
colnames(dims)<-c("x","y")
dims$cellID<-row.names(dims)
dims<-merge(dims,object_input@meta.data,by.x="cellID",by.y="row.names")
#Add cell embeddings into seurat
cell_embeddings<-as.data.frame(cisTopicObject@selected.model$document_expects)
colnames(cell_embeddings)<-cisTopicObject@cell.names
n_topics<-nrow(cell_embeddings)
row.names(cell_embeddings)<-paste0("topic_",1:n_topics)
cell_embeddings<-as.data.frame(t(cell_embeddings))
#Add feature loadings into seurat
feature_loadings<-as.data.frame(cisTopicObject@selected.model$topics)
row.names(feature_loadings)<-paste0("topic_",1:n_topics)
feature_loadings<-as.data.frame(t(feature_loadings))
#combined cistopic results (cistopic loadings and umap with seurat object)
cistopic_obj<-CreateDimReducObject(embeddings=as.matrix(cell_embeddings),loadings=as.matrix(feature_loadings),assay="peaks",key="topic_")
umap_dims<-as.data.frame(as.matrix(dims[2:3]))
colnames(umap_dims)<-c("UMAP_1","UMAP_2")
row.names(umap_dims)<-dims$cellID
cistopic_umap<-CreateDimReducObject(embeddings=as.matrix(umap_dims),assay="peaks",key="UMAP_")
object_input@reductions$cistopic<-cistopic_obj
object_input@reductions$umap<-cistopic_umap
n_topics<-ncol(Embeddings(object_input,reduction="cistopic"))
object_input <- FindNeighbors(
object = object_input,
reduction = 'cistopic',
dims = 1:n_topics
)
object_input <- FindClusters(
object = object_input,
verbose = TRUE,
resolution=resolution
)
return(object_input)}
DefaultAssay(orgo_cirm43)<-"peaks"
orgo_cirm43<-cistopic_wrapper(object_input=orgo_cirm43,cisTopicObject=cirm43_cisTopicObject,resolution=0.2)
for (i in c("DIV","seurat_clusters")){
plt<-DimPlot(orgo_cirm43,group.by=c(i))
ggsave(plt,file=paste0("cirm43.",i,"postqc.umap.pdf"),width=5,height=5)
system(paste0("slack -F ",paste0("cirm43.",i,"postqc.umap.pdf")," ryan_todo"))}
orgo_cirm43$postqc_clusters<-orgo_cirm43$seurat_clusters
saveRDS(orgo_cirm43,file="orgo_cirm43.QC2.SeuratObject.Rds") ###save Seurat file
library(Signac)
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(patchwork)
library(monocle3,lib.loc="/home/groups/oroaklab/src/R/R-4.0.0/library/") #using old install of monocle, just need for as.cell_data_set conversion
library(cicero,lib.loc="/home/groups/oroaklab/src/R/R-4.0.0/library/") #and using old version of cicero
library(EnsDb.Hsapiens.v86)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
#Cicero processing function
cicero_processing<-function(object_input=orgo_atac,prefix="orgo_atac"){
#Generate CDS format from Seurat object
atac.cds <- as.cell_data_set(object_input,assay="peaks",reduction="umap")
# convert to CellDataSet format and make the cicero object
print("Making Cicero format CDS file")
atac.cicero <- make_cicero_cds(atac.cds, reduced_coordinates = reducedDims(atac.cds)$UMAP)
saveRDS(atac.cicero,paste(prefix,"atac_cicero_cds.Rds",sep="_"))
genome <- seqlengths(object_input) # get the chromosome sizes from the Seurat object
genome.df <- data.frame("chr" = names(genome), "length" = genome) # convert chromosome sizes to a dataframe
print("Running Cicero to generate connections.")
conns <- run_cicero(atac.cicero, genomic_coords = genome.df) # run cicero
saveRDS(conns,paste(prefix,"atac_cicero_conns.Rds",sep="_"))
print("Generating CCANs")
ccans <- generate_ccans(conns) # generate ccans
saveRDS(ccans,paste(prefix,"atac_cicero_ccans.Rds",sep="_"))
print("Adding CCAN links into Seurat Object and Returning.")
links <- ConnectionsToLinks(conns = conns, ccans = ccans) #Add connections back to Seurat object as links
Links(object_input) <- links
return(object_input)
}
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds")
orgo_cirm43<-cicero_processing(object_input=orgo_cirm43,prefix="orgo_cirm43.QC2")
saveRDS(orgo_cirm43,"orgo_cirm43.SeuratObject.unnormGA.Rds")
orgo_cirm43<-readRDS("orgo_cirm43.SeuratObject.unnormGA.Rds")
# generate unnormalized gene activity matrix
# gene annotation sample
hg38_annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
pos <-as.data.frame(hg38_annotations,row.names=NULL)
pos$chromosome<-paste0("chr",pos$seqnames)
pos$gene<-pos$gene_id
pos <- subset(pos, strand == "+")
pos <- pos[order(pos$start),]
pos <- pos[!duplicated(pos$tx_id),] # remove all but the first exons per transcript
pos$end <- pos$start + 1 # make a 1 base pair marker of the TSS
neg <-as.data.frame(hg38_annotations,row.names=NULL)
neg$chromosome<-paste0("chr",neg$seqnames)
neg$gene<-neg$gene_id
neg <- subset(neg, strand == "-")
neg <- neg[order(neg$start,decreasing=TRUE),]
neg <- neg[!duplicated(neg$tx_id),] # remove all but the first exons per transcript
neg$end <- neg$end + 1 # make a 1 base pair marker of the TSS
gene_annotation<- rbind(pos, neg)
gene_annotation <- gene_annotation[,c("chromosome","start","end","gene_name")] # Make a subset of the TSS annotation columns containing just the coordinates and the gene name
names(gene_annotation)[4] <- "gene" # Rename the gene symbol column to "gene"
geneactivity_processing<-function(cds_input,conns_input,prefix){
atac.cds<- annotate_cds_by_site(cds_input, gene_annotation)
unnorm_ga <- build_gene_activity_matrix(atac.cds, conns_input)
saveRDS(unnorm_ga,paste(prefix,"unnorm_GA.Rds",sep="."))
}
conns<-as.data.frame(readRDS("orgo_cirm43_atac_cicero_conns.Rds"))
orgo_cirm43.cicero<-readRDS("orgo_cirm43_atac_cicero_cds.Rds")
geneactivity_processing(cds_input=as.cell_data_set(orgo_cirm43,group_by="seurat_clusters"),conns_input=conns,prefix="cirm43_atac")
#Read in unnormalized GA
cicero_gene_activities<-readRDS("cirm43_atac.unnorm_GA.Rds")
orgo_cirm43[['GeneActivity']]<- CreateAssayObject(counts = cicero_gene_activities)
# normalize
orgo_cirm43 <- NormalizeData(
object = orgo_cirm43,
assay = 'GeneActivity',
normalization.method = 'LogNormalize',
scale.factor = median(orgo_cirm43$nCount_GeneActivity)
)
saveRDS(orgo_cirm43,"orgo_cirm43.QC2.SeuratObject.Rds")
Cell Type Assignment of Organoid Clusters Doing this in three parts.
Downloading Ziffra Et al. from UCSC cell browser
#download peaks
mkdir /home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/ziffra
cd /home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/ziffra
wget https://cells.ucsc.edu/cortex-atac/peaks/matrix.mtx.gz
wget https://cells.ucsc.edu/cortex-atac/peaks/features.tsv.gz
wget https://cells.ucsc.edu/cortex-atac/peaks/barcodes.tsv.gz
wget https://cells.ucsc.edu/cortex-atac/peaks/meta.tsv
library(Seurat)
library(data.table)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/ziffra")
#Cell name issues or something, just making a GA object for now
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/ziffra")
metadata<-read.csv("meta.tsv",sep="\t") #metadata
row.names(metadata)<-metadata$uniqueID
#Add Gene Activity data
mat<-fread("https://cells.ucsc.edu/cortex-atac/genes/exprMatrix.tsv.gz") #download gene activity
genes = mat[,1][[1]]
genes = gsub(".+[|]", "", genes)
genes_list<-which(!duplicated(genes)) #remove duplicate gene names
mat = data.frame(mat[genes_list,-1], row.names=genes[genes_list])
colnames(mat) = gsub("[.]", "-", colnames(mat))
ziffra <- CreateSeuratObject(
counts = mat,
assay = "Ziffra_GeneActivity"
)
ziffra<-AddMetaData(ziffra,metadata=metadata)
saveRDS(ziffra,"ziffra.SeuratObject.Rds")
#https://satijalab.org/seurat/v3.1/atacseq_integration_vignette.html://satijalab.org/seurat/v3.1/atacseq_integration_vignette.html
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v86)
library(ggplot2)
set.seed(1234)
library(reshape2)
library(dplyr)
library(Matrix)
library(cicero)
library(SeuratWrappers)
library(ComplexHeatmap)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
library(circlize)
library(viridis)
# Load the pre-processed scRNA-seq and scATAC-seq data
#Public RNA
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data")
pubprimary<-readRDS("PublicPrimary.SeuratObject.rds")
#perform random subsampling, since cross data integration is robust to cell count and its taking forever
#using 1/10th the cells (~10k)
pubprimary <- subset(pubprimary, cells = sample(x = colnames(pubprimary@assays$RNA@data), size = length(colnames(pubprimary@assays$RNA@data))/10) )
pubprimary<-SetIdent(pubprimary,value="Type")
#subset to cell types expected to occur in organoids
pubprimary<-subset(pubprimary,idents=c("Excitatory Neuron","IPC","Radial Glia","Inhibitory Neuron"))
#Our ATAC
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS(file="orgo_cirm43.QC2.SeuratObject.Rds")
#1. Using bulk sorted RG, IPC, eN and iN RNA markers compared to our ATAC cluster gene activity scores
#Corticogenic data on basic cell types.
#data from http://data.nemoarchive.org/5923ca16c51011e99da31f7757ebac1c/
#described in https://www.nature.com/articles/s41586-020-2825-4?WT.ec_id=NATURE-202010&sap-outbound-id=60313C942AEB24BFE1AF8DD74FB2E05B7385E720#data-availability
#Bulk ATAC Peaks for marker sorted cell types located in /home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/Song_2020
#For overlapping these narrowPeak features do:
#Added marker TFs in as well
markers<-c("CTCF","EMX1","EMX2","LHX2","PAX6","RFX4","SOX2","SOX4",
"TCF7","GSX1","NFIX","EOMES","TBR1","NEUROD1","NEUROD2","NEUROG1","TGIF1","TGIF2","BHLHE23","MEF2A",
"DLX1","DLX2","DLX6","GSX2","LHX6",
"POU3F3","POU3F2","TFAP4")
#Plot motifs alongside chromvar plot
library(ggplot2)
library(patchwork)
motif_order<-names(orgo_cirm43@assays$peaks@motifs@motif.names[match(markers,unlist(orgo_cirm43@assays$peaks@motifs@motif.names),nomatch=0)])
DefaultAssay(orgo_cirm43)<-"peaks"
plt<-MotifPlot(object = orgo_cirm43,motifs = motif_order,ncol=1)+theme_void()+theme(strip.text = element_blank())
ggsave(plt,file="tf.heatmap.motif.pdf",height=10,width=2,limitsize=F)
system("slack -F tf.heatmap.motif.pdf ryan_todo")
#Setting up chromvar matrix from CIRM43
tfList <- getMatrixByID(JASPAR2020, ID=row.names(orgo_cirm43@assays$chromvar@data))
tfList <-unlist(lapply(names(tfList), function(x) name(tfList[[x]])))
motif_plots<-MotifPlot(
object = orgo_cirm43,
motifs = row.names(orgo_cirm43@assays$chromvar@data)[match(markers,tfList,nomatch=0)],
assay = 'peaks'
)
ggsave(motif_plots,file="marker_motifs.pdf")
system("slack -F marker_motifs.pdf ryan_todo")
dat_tf<-orgo_cirm43@assays$chromvar@data
row.names(dat_tf)<-tfList
dat_tf<-data.frame(t(dat_tf))
dat_tf$cellID<-row.names(dat_tf)
dat_tf$cluster_ID<-orgo_cirm43@meta.data[match(orgo_cirm43@meta.data$cellID,dat_tf$cellID),]$seurat_clusters #append cluster ID to column
dat_tf<-dat_tf[colnames(dat_tf) %in% c("cluster_ID",markers)] #subset to markers
dat_tf<-melt(dat_tf) #reshape to long format
dat_tf<-as.data.frame(dat_tf %>% group_by(cluster_ID,variable) %>% summarize(mean_chromvar=median(value))) #group by to summarize markers by column
#plot as heatmap
dat_tf<-dcast(dat_tf,cluster_ID~variable)
row.names(dat_tf)<-dat_tf$cluster_ID
dat_tf<-dat_tf[colnames(dat_tf) %in% markers]
dat_tf[which(is.na(dat_tf),arr.ind=T)]<-0 #set na values to 0 for clustering
dat_tf<-as.data.frame(t(dat_tf))
clus_order<-c("6","5","0","4","1","3","2")
dat_tf<-dat_tf[clus_order]
colfun=colorRamp2(quantile(unlist(dat_tf), probs=seq(0.1,0.9,0.1)),cividis(length(seq(0.1,0.9,0.1))))
plt<-Heatmap(dat_tf,
row_order=match(markers,row.names(dat_tf))[!is.na(match(markers,row.names(dat_tf)))],
column_order=clus_order,
col=colfun)
#column_split=column_split)
pdf("cirm43_celltype_tfHeatmap.pdf")
plt
dev.off()
system("slack -F cirm43_celltype_tfHeatmap.pdf ryan_todo")
#2. Using bulk sorted RG, IPC, eN and iN ATAC motifs compared to our ATAC cluster motifs
#Setting up gene activity matrix
markers<-c("SOX2","PAX6","HES1","HOPX","VIM","GFAP","TNC","GPX3",
"NEUROG1","SSTR2","EOMES","PPP1R17","NEUROD4",
"SLC17A7","NEUROD6","SATB2","TBR1","SLA",
"DLX2","DLX1","LHX6","GAD1","SOX4","TCF7","GSX1","NFIX","BHLHE23","MEF2A")
dat_ga<-orgo_cirm43@assays$GeneActivity@data
dat_ga<-data.frame(t(dat_ga))
dat_ga$cellID<-row.names(dat_ga)
#append cluster ID to column
dat_ga$cluster_ID<-orgo_cirm43@meta.data[match(orgo_cirm43@meta.data$cellID,dat_ga$cellID),]$seurat_clusters
#subset to markers
dat_ga<-dat_ga[colnames(dat_ga) %in% c("cluster_ID",markers)]
#reshape to long format
dat_ga<-melt(dat_ga)
#group by to summarize markers by column
dat_ga<-as.data.frame(dat_ga %>% group_by(cluster_ID,variable) %>% summarize(mean_ga=mean(value)))
#plot as heatmap
dat_ga<-dcast(dat_ga,cluster_ID~variable)
row.names(dat_ga)<-dat_ga$cluster_ID
dat_ga<-dat_ga[colnames(dat_ga) %in% markers]
#zscore values
dat_ga<-scale(dat_ga)
#set na values to 0 for clustering
dat_ga<-data.frame(t(dat_ga))
clus_order<-c("6","5","0","4","1","3","2")
clus_order<-paste0("X",clus_order)
#column_split<-factor(unlist(lapply(strsplit(colnames(dat_ga),"_"),"[",1)),levels=c("X3","X1","X2","X0"))
dat_ga<-dat_ga[colnames(dat_ga) %in% clus_order]
colfun=colorRamp2(quantile(unlist(dat_ga), probs=seq(0.1,0.9,0.1)),magma(length(seq(0.1,0.9,0.1))))
plt<-Heatmap(dat_ga,
row_order=match(markers,row.names(dat_ga))[!is.na(match(markers,row.names(dat_ga)))],
column_order=clus_order,
col=colfun)
#column_split=column_split)
pdf("cirm43_celltype_gaHeatmap.pdf")
plt
dev.off()
system("slack -F cirm43_celltype_gaHeatmap.pdf ryan_todo")
#3. Using single-cell Primary Cortex RG, IPC, eN and iN annotated cells to define signatures and perform CCA for label transfer
markers<-read.table("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/pubprimary.markers.txt",header=T,sep="\t")
transfer.anchors <- FindTransferAnchors(
reference = pubprimary,
reference.assay="RNA",
query = orgo_cirm43,
query.assay="GeneActivity",
reduction = 'cca',
features=markers$gene,
verbose=T
)
saveRDS(transfer.anchors,"orgo_cirm43.PublicPrimary.transferanchors.rds")
orgo_cirm43[["celltype_prediction"]]<- TransferData(
anchorset = transfer.anchors,
refdata = pubprimary$Type,
weight.reduction = "cca",
dims = 1:30,
prediction.assay=T
)
saveRDS(orgo_cirm43,file="orgo_cirm43.QC2.SeuratObject.Rds")
plt1<-FeaturePlot(orgo_cirm43,features=c('Radial Glia'),pt.size=0.1,min.cutoff="q75",order=T,col=c("white","#EFA421"))
plt2<-FeaturePlot(orgo_cirm43,features=c('Excitatory Neuron'),pt.size=0.1,min.cutoff="q75",order=T,col=c("white","#6B78BA"))
plt3<-FeaturePlot(orgo_cirm43,features=c('IPC'),pt.size=0.1,min.cutoff="q75",sort=T)
plt<-plt1/plt2/plt3
ggsave(plt,file="cirm43.predictedid.umap.png",width=10,height=30,limitsize=F)
ggsave(plt,file="cirm43.predictedid.umap.pdf",width=10,height=30,limitsize=F)
system("slack -F cirm43.predictedid.umap.pdf ryan_todo")
predictdat<-as.data.frame(t(orgo_cirm43@assays$celltype_prediction@data))[,1:4]
predictdat$seurat_clusters<-orgo_cirm43@meta.data[row.names(predictdat),]$seurat_clusters
predictdat<-melt(predictdat)
predictdat<-as.data.frame(predictdat %>% group_by(seurat_clusters,variable) %>% summarize(average=mean(value)))
predictdat<-dcast(predictdat,seurat_clusters~variable)
row.names(predictdat)<-predictdat$seurat_clusters
predictdat<-predictdat[!(colnames(predictdat) %in% c("seurat_clusters"))]
predictdat<-as.data.frame(t(scale(predictdat,scale=T)))
clus_order<-c("6","5","0","4","1","3","2")
colfun=colorRamp2(c(-2,0,2),c("#000000","#FFFFFF","#FF0000"))
predictdat<-predictdat[colnames(predictdat) %in% clus_order]
plt<-Heatmap(predictdat,
row_order=c("Radial Glia","IPC","Excitatory Neuron","Inhibitory Neuron"),
column_order=clus_order,
col=colfun)
pdf("predictedid.heatmap.pdf")
plt
dev.off()
system("slack -F predictedid.heatmap.pdf ryan_todo")
saveRDS(orgo_cirm43,file="orgo_cirm43.QC2.SeuratObject.Rds")
library(Signac)
library(Seurat)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
set.seed(1234)
library(motifmatchr)
library(parallel)
library(ggplot2)
library(motifmatchr)
library(chromVAR)
library(universalmotif)
library(EnsDb.Hsapiens.v86)
library(cicero,lib.loc="/home/groups/oroaklab/nishida/R_4.0.0_arsn")
library(monocle3,lib.loc="/home/groups/oroaklab/nishida/R_4.0.0_arsn")
library(SeuratWrappers)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS(file="orgo_cirm43.QC2.SeuratObject.Rds")
#Run cicero for binned cells
#Cicero processing function
cicero_processing<-function(object_input,prefix){
#if(dim(object_input)[2]<15000){
atac.cds <- as.CellDataSet(object_input,assay="peaks") #Generate CDS format from Seurat object
atac.cicero <- make_cicero_cds(atac.cds, reduced_coordinates = atac.cds@reducedDimS)
#}else{
#atac.cds<-make_atac_cds(mefa4::Melt(object_input@assays$peaks@counts[rowSums(object_input@assays$peaks@counts)>0,]),) #For larger files use raw data, this is only because my version of monocle and cicero are a bit outdated
#atac.cicero <- make_cicero_cds(atac.cds, reduced_coordinates = object_input@reductions$umap@cell.embeddings)
#}
saveRDS(atac.cicero,paste(prefix,"atac_cicero_cds.Rds",sep="_"))
# extract gene annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevels(annotations)<-paste0("chr",seqlevels(annotations))
# change to UCSC style since the data was mapped to hg38
#seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"
genome <- annotations@seqinfo # get the chromosome sizes from the Seurat object
genome.df <- data.frame("chr" = genome@seqnames, "length" = genome@seqlengths) # convert chromosome sizes to a dataframe
print("Running Cicero to generate connections.")
conns <- run_cicero(atac.cicero, genomic_coords = genome.df) # run cicero
saveRDS(conns,paste(prefix,"atac_cicero_conns.Rds",sep="_"))
print("Generating CCANs")
ccans <- generate_ccans(conns) # generate ccans
saveRDS(ccans,paste(prefix,"atac_cicero_ccans.Rds",sep="_"))
print("Adding CCAN links into Seurat Object and Returning.")
links <- ConnectionsToLinks(conns = conns, ccans = ccans) #Add connections back to Seurat object as links
DefaultAssay(object_input)<-"peaks"
Links(object_input) <- links
return(object_input)
}
#run cicero linkage per cluster
for (i in unique(orgo_cirm43$seurat_clusters)){
dat<-subset(orgo_cirm43,seurat_clusters==i)
dat<-cicero_processing(object_input=dat,prefix=paste0("orgo_cirm43.QC2.",i))
saveRDS(dat,paste0("orgo_cirm43.QC2.",i,".Rds"))
}
#run per DIV?
orgo_div30<-subset(orgo_cirm43,DIV=="30")
orgo_div30<-cicero_processing(object_input=orgo_div30,prefix="orgo_cirm43.QC2.DIV30")
saveRDS(orgo_div30,"orgo_cirm43.QC2.DIV30.Rds")
#orgo_div30<-readRDS("orgo_cirm43.QC2.DIV30.Rds")
orgo_div60<-subset(orgo_cirm43,DIV=="60")
orgo_div60<-cicero_processing(object_input=orgo_div60,prefix="orgo_cirm43.QC2.DIV60")
saveRDS(orgo_div60,"orgo_cirm43.QC2.DIV60.Rds")
#orgo_div60<-readRDS("orgo_cirm43.QC2.DIV60.Rds")
orgo_div90<-subset(orgo_cirm43,DIV=="90")
orgo_div90<-cicero_processing(object_input=orgo_div90,prefix="orgo_cirm43.QC2.DIV90")
saveRDS(orgo_div90,"orgo_cirm43.QC2.DIV90.Rds")
#orgo_div90<-readRDS("orgo_cirm43.QC2.DIV90.Rds")
Plot genome tracks
library(Signac)
library(Seurat)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
set.seed(1234)
library(motifmatchr)
library(parallel)
library(ggplot2)
library(motifmatchr)
library(chromVAR)
library(universalmotif)
library(EnsDb.Hsapiens.v86)
library(cicero,lib.loc="/home/groups/oroaklab/nishida/R_4.0.0_arsn")
library(monocle3,lib.loc="/home/groups/oroaklab/nishida/R_4.0.0_arsn")
library(SeuratWrappers)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS(file="orgo_cirm43.QC2.SeuratObject.Rds")
Idents(orgo_cirm43)<-orgo_cirm43$seurat_clusters
# extract position frequency matrices for the motifs
pwm <- getMatrixSet(
x = JASPAR2020,
opts = list(species = 9606, all_versions = FALSE)
)
# add motif information
DefaultAssay(orgo_cirm43)<-"peaks"
orgo_cirm43 <- AddMotifs(orgo_cirm43, genome = BSgenome.Hsapiens.UCSC.hg38, pfm = pwm)
saveRDS(orgo_cirm43,file="orgo_cirm43.QC2.SeuratObject.Rds")
# gather the footprinting information for sets of motifs
orgo_cirm43 <- Footprint(
object = orgo_cirm43,
motif.name = c("SOX2","TBR1","EOMES"),
genome = BSgenome.Hsapiens.UCSC.hg38
)
# plot the footprint data for each group of cells
p2 <- PlotFootprint(orgo_cirm43, features = c("SOX2","TBR1","EOMES"))
p2 + patchwork::plot_layout(ncol = 1)
ggsave(p2,file=paste0("orgo_cirm43.TF_footprints.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.TF_footprints.pdf"," ryan_todo"))
#use link plot to generate cicero links
#Generate Coverage Plots Across Genes
#####Using a custom Plotting function###############
#SOX2 chr3-181711925-181714436 MA0143.4
#TBR1 chr2-161416297-161425870 MA0802.1
#EOMES chr3-27715949-27722713 MA0800.1
#HOPX chr4-56647988-56681899
#BCL11B chr14-99169287-99272197 MA1989.1
#####################Old style of plotting######################
#plot all panels for each subset (cluster in this case)
dat<-orgo_cirm43
dat$seurat_clusters<-factor(dat$seurat_clusters,levels=c(6,5,0,4,1,3,2))
#PAX6
PAX6_plot<-CoveragePlot(object = dat, region="PAX6", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="PAX6",expression.assay="GeneActivity")
ggsave(PAX6_plot,file=paste0("orgo_cirm43.PAX6.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.PAX6.featureplots.pdf"," ryan_todo"))
#SOX2
sox2_plot<-CoveragePlot(object = dat, region="SOX2", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="SOX2",expression.assay="GeneActivity")
ggsave(sox2_plot,file=paste0("orgo_cirm43.SOX2.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.SOX2.featureplots.pdf"," ryan_todo"))
#HOPX
HOPX_plot<-CoveragePlot(object = dat, region="HOPX", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="HOPX",expression.assay="GeneActivity")
ggsave(HOPX_plot,file=paste0("orgo_cirm43.HOPX.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.HOPX.featureplots.pdf"," ryan_todo"))
#EOMES
EOMES_plot<-CoveragePlot(object = dat, region="EOMES", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="EOMES",expression.assay="GeneActivity")
ggsave(EOMES_plot,file=paste0("orgo_cirm43.EOMES.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.EOMES.featureplots.pdf"," ryan_todo"))
#TBR1
TBR1_plot<-CoveragePlot(object = dat, region="TBR1", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="TBR1",expression.assay="GeneActivity")
ggsave(TBR1_plot,file=paste0("orgo_cirm43.TBR1.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.TBR1.featureplots.pdf"," ryan_todo"))
#NEUROD1
NEUROD1_plot<-CoveragePlot(object = dat, region="NEUROD1", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="NEUROD1",expression.assay="GeneActivity")
ggsave(NEUROD1_plot,file=paste0("orgo_cirm43.NEUROD1.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.NEUROD1.featureplots.pdf"," ryan_todo"))
#BCL11B
BCL11B_plot<-CoveragePlot(object = dat, region="BCL11B", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="BCL11B",expression.assay="GeneActivity")
ggsave(BCL11B_plot,file=paste0("orgo_cirm43.BCL11B.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.BCL11B.featureplots.pdf"," ryan_todo"))
#SATB2
SATB2_plot<-CoveragePlot(object = dat, region="SATB2", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="SATB2",expression.assay="GeneActivity")
ggsave(SATB2_plot,file=paste0("orgo_cirm43.SATB2.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.SATB2.featureplots.pdf"," ryan_todo"))
#CUX1
CUX1_plot<-CoveragePlot(object = dat, region="CUX1", assay="peaks", ident=dat$seurat_clusters, extend.upstream=2000,extend.downstream=2000,features="CUX1",expression.assay="GeneActivity")
ggsave(CUX1_plot,file=paste0("orgo_cirm43.CUX1.featureplots.pdf"),height=40,width=20,limitsize=F)
system(paste0("slack -F ","orgo_cirm43.CUX1.featureplots.pdf"," ryan_todo"))
Seurat has a stored set of cell cycle genes that we can use to assess cell cycle signatures.
Following this. Using gene lists based on cell cycle markers listed in https://www.cell.com/neuron/pdf/S0896-6273(19)30561-6.pdf Supplementary Table 7.
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
library(Seurat)
library(Signac)
library(RColorBrewer)
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds")
s.genes <- c('AK2', 'SLC25A5', 'TMEM98', 'GGCT', 'DBF4', 'PAX6', 'SPAG9', 'RPS20', 'BAZ1B', 'UQCRC1', 'ANLN', 'BRCA1', 'DDX11', 'TACC3', 'VIM', 'HMGB3', 'CENPQ', 'RFC1', 'MAT2B', 'SPDL1', 'CNTLN', 'TPR', 'RCN1', 'RFC2', 'RAD51', 'POLQ', 'MPHOSPH9', 'NNAT', 'SYNE2', 'COL11A1', 'QSER1', 'SNCAIP', 'MCM10', 'YBX1', 'ASPM', 'MPPED2', 'PKM', 'RHOA', 'PRR11', 'NUCKS1', 'RAD18', 'SMC1A', 'HMMR', 'MCM2', 'CA12', 'PTPLAD1', 'ENO1', 'GTSE1', 'ACTB', 'MCM6', 'SPAG5', 'UBE2T', 'POLD3', 'JADE1', 'FBLN1', 'SLC1A3', 'XRCC5', 'KIF22', 'RBL1', 'NDC80', 'HSPB11', 'XPO1', 'GSTP1', 'SRRT', 'SF3B2', 'TFAP2C', 'TPX2', 'RPLP0', 'FUS', 'KIF4A', 'ORC6', 'ZFHX4', 'HNRNPC', 'SUPT16H', 'WDR76', 'PHGDH', 'EZR', 'MYL6', 'CLSPN', 'CDC45', 'CDC6', 'CBX5', 'MSH2', 'CDC5L', 'HNRNPH3', 'H2AFY2', 'HNRNPM', 'RANBP1', 'SNRPD3', 'CENPM', 'HMGXB4', 'MCM5', 'RPL3', 'FKBP3', 'CEP128', 'ERH', 'VRK1', 'PNN', 'GINS1', 'PLCB4', 'NOP56', 'SMCHD1', 'RBBP8', 'POLA1', 'STAG2', 'RBBP7', 'CMC2', 'UBE2I', 'CCP110', 'CEP152', 'SFRP1', 'EEF1D', 'MCM4', 'RNASEH2A', 'HNRNPUL1', 'LIG1', 'CDK6', 'PTN', 'H2AFV', 'CHCHD2', 'HSPB1', 'NUDT1', 'PPP1R17', 'LSM5', 'RPA3', 'EZH2', 'RHEB', 'LHX2', 'ELAVL2', 'NSMCE4A', 'SMC3', 'KPNB1', 'CBX1', 'PFN1', 'TMEM97', 'WHSC1', 'NCAPG', 'CCDC34', 'MDK', 'C11orf58', 'CORO1C', 'PTGES3', 'FOXM1', 'RAD51AP1', 'TIMELESS', 'GAPDH', 'CHD4', 'TPI1', 'C12orf57', 'LDHB', 'SRSF9', 'FBXO5', 'SRSF3', 'MCM3', 'E2F3', 'GMNN', 'TTK', 'ERBB2IP', 'LMNB1', 'H2AFY', 'SMAD5', 'SMC4', 'TFDP2', 'HES1', 'ECT2', 'BBX', 'NCL', 'PPM1G', 'RPS15', 'FANCL', 'SRSF7', 'MSH6', 'SRSF4', 'IVNS1ABP', 'ACADM', 'PRDX1', 'CNN3', 'CENPF', 'RPA2', 'MESDC2', 'STAG1', 'CASP8AP2', 'HMGN3', 'RPN2', 'CCND2', 'CTNNAL1', 'WDR34', 'SET', 'CNTRL', 'FAM178A', 'HELLS', 'ENY2', 'MASTL', 'EXOSC8', 'EGR1', 'TMPO', 'NFYB', 'NCAPH', 'MND1', 'CCDC18', 'CBX3', 'HNRNPA2B1', 'WIPF3', 'NPY', 'ZWINT', 'CDKN2C', 'DDX39A', 'CENPK', 'NEUROD4', 'CDK2', 'TUBA1B', 'STIL', 'HJURP', 'BAZ2B', 'EXOSC9', 'CKS2', 'SNRPC', 'HIST1H1D', 'HIST1H1A', 'GLO1', 'DEK', 'SOX9', 'PPDPF', 'SNRPD2', 'SNRPB', 'MGME1', 'MCM8', 'HNRNPR', 'RALY', 'UBA2', 'DLGAP5', 'YEATS4', 'PIN1', 'HP1BP3', 'PKMYT1', 'PAICS', 'SPECC1', 'CALU', 'HAT1', 'DUT', 'FAM64A', 'ILF3', 'PARP2', 'MIS18BP1', 'SGOL1', 'GADD45G', 'LSM4', 'DNMT1', 'AKAP12', 'GINS2', 'PSMC3IP', 'TOP2A', 'RAN', 'PCNA', 'NES', 'NASP', 'MYH10', 'TPT1',
'RFC3', 'ANKRD32', 'LRRCC1', 'MEIS2', 'TMEM106C', 'RBM17', 'SYNCRIP', 'ATP5G2', 'CDK4', 'HNRNPA1', 'AHI1', 'DHX9', 'RNASEH2B', 'CKAP2', 'SCRN1', 'SRSF1', 'BRIP1', 'ACTL6A', 'TRA2B', 'SMC2', 'CDK5RAP2', 'ANP32B', 'RPL35', 'RPS6', 'GGH', 'RDX', 'CTDSPL2', 'NUSAP1', 'KIF23', 'CASC5', 'RPLP1', 'KIF11', 'KIF20B', 'DNA2', 'BARD1', 'PPIG', 'MNS1', 'ZGRF1', 'HNRNPD', '44450', 'CENPE', 'HADH', 'SCAF11', 'PHLDA1', 'SNRPF', 'NEDD1', 'ASCL1', 'BRCA2', 'DIAPH3', 'TMX1', 'SERF2', 'COMMD4', 'FANCI', 'MFGE8', 'ANAPC11', 'NFIC', 'SAE1', 'PLK4', 'ITGB3BP', 'KIF2C', 'NUF2', 'ANP32E', 'DTL', 'ILF2', 'SRP9', 'PARP1', 'LBR', 'SNRPG', 'SLC20A1', 'CDCA7', 'GULP1', 'HSPD1', 'HES6', 'FANCD2', 'CENPC', 'CCNA2', 'MYO10', 'G3BP1', 'PHIP', 'MMS22L', 'CDCA5', 'NCAPG2', 'NONO', 'RBMX', 'GINS4', 'PLIN2', 'HAUS6', 'RPL7A', 'ZEB1', 'MKI67', 'SSRP1', 'RPS3', 'INCENP', 'CHEK1', 'DSN1', 'HIRIP3', 'ITGB1', 'CCT5', 'MAGI1', 'NCAPD3', 'CENPU', 'CENPJ', 'SCHIP1', 'MZT2B', 'HAUS1', 'SPC25', 'TMEM123', 'HNRNPDL', 'CENPH', 'CARHSP1', 'SMARCA5', 'HNRNPU', 'SREK1', 'CHD1', 'BUB3', 'BTG3', 'DBI', 'TMEM237', 'VBP1', 'ATAD2', 'BUB1B', 'CCNB2', 'TMSB15A', 'EIF5B', 'MIS18A', 'C21orf58', 'PCNT', 'FDPS', 'IER2', 'RPL8', 'SRSF2', 'RACGAP1', 'SPC24', 'ASRGL1', 'MAGOH', 'RBBP4', 'NFIA', 'USP1', 'PEA15', 'KIAA1524', 'EOMES', 'SGOL2', 'GMPS', 'TOPBP1', 'KIF15', 'RFC4', 'SLBP', 'RNF168', 'H2AFZ', 'PGRMC2', 'HMGB2', 'MAD2L1', 'ANXA5', 'RHOBTB3', 'STK17A', 'PTTG1', 'CDCA7L', 'FABP5', 'RAD21', 'PSIP1', 'HNRNPK', 'MELK', 'SPTSSA', 'SKA3', 'LRR1', 'E2F7', 'PSMC3', 'CEP295', 'CKB', 'CENPN', 'MCM7', 'CENPV', 'B2M', 'FAM111A', 'KIAA0101', 'SNRPD1', 'ACAA2', 'RRM1', 'TPM4', 'CHAF1A', 'C19orf48', 'PRDX2', 'TK1', 'SRRM2', 'RPSA', 'PBK', 'RBPJ', 'GNG4', 'HIST1H1E', '44441', 'DTYMK', 'FEN1', 'STXBP6', 'HNRNPH1', 'SDC2', 'CKAP2L', 'BUB1', 'CNBP', 'HNRNPF', 'UBE2E3', 'KCNAB3', 'HNRNPA3', 'CDK1', 'UBB', 'FOS', 'EMX2', 'PA2G4', 'LSM3', 'SHCBP1', 'CHD7', 'ESCO2', 'CXXC5', 'RRM2', 'RPS7', 'ID4', 'CKS1B', 'INSM1', 'SMARCC1', 'GOLIM4', 'GNG5', 'EXO1', 'ZWILCH', 'LARP7', 'CEP135', 'RSRC1', 'UBE2C', 'CSRP2', 'CCNE2', 'BANF1', 'CCDC14', 'NR2F1', 'COX8A', 'TYMS', 'PXMP2', 'RPLP2', 'JUN', 'HNRNPA0', 'ARL6IP6', 'KDELC2', 'GEN1', 'SUZ12', 'RMI1', 'AURKB', 'RAD23A', 'SSTR2', 'NPM1', 'PENK', 'SOX2', 'ZBTB20', 'NEUROG1', 'SNRPE', 'RTKN2', 'IDH2', 'SKA2', 'HIST2H2AC', 'HIST1H1B', 'POU3F2', 'H1FX', 'NDUFA6', 'SIVA1', 'ZFP36L1', 'MYBL1', 'NKAIN3', '44449', 'NAP1L1', 'PTMA', 'HIST1H1C', 'TUBB4B', 'H2AFX', 'SUMO2', 'FAM111B', 'H1F0', 'HMGB1', 'PPIA', 'XRCC6', 'XRCC2', 'HIST1H4C', 'PCBP2', 'BLM', 'HNRNPAB', 'HES5', 'ELOVL2', 'PRIM1', 'HMGN5', 'RPL23A', 'ASPH', 'WDHD1', 'BAZ1A', 'SMOC1', 'ARHGAP11A', 'HMGN2', 'CCDC152', 'SMC5', 'PRC1', 'CCDC167', 'CENPW', 'GPANK1', 'NAP1L4', 'TMSB4X', 'HMGN1', 'HN1L', 'DNAJC9', 'MIR99AHG', 'CKLF', 'UBA52', 'FGD5-AS1', 'DHFR', 'RPL41', 'DLEU2', 'LINC01158', 'MAGI2-AS3', 'PEG10', 'SNHG6', 'TMEM158', 'PRKDC')
g2m.genes <- c('CDC27', 'DBF4', 'PAX6', 'SPAG9', 'NCAPD2', 'ANLN', 'BRCA1', 'TACC3', 'DEPDC1', 'VIM', 'HMGB3', 'DEPDC1B', 'MAT2B', 'SPDL1', 'PSMA4', 'CNTLN', 'TPR', 'SLC4A8', 'POLQ', 'MPHOSPH9', 'NNAT', 'SYNE2', 'CCAR1', 'COL11A1', 'QSER1', 'SPA17', 'SUGP2', 'HMG20B', 'ASPM', 'MPPED2', 'PRR11', 'LAPTM4A', 'NUCKS1', 'SMC1A', 'HMMR', 'NDE1', 'SRI', 'GTSE1', 'ACTB', 'SPAG5', 'UBE2T', 'JADE1', 'PPP2R5C', 'PCM1', 'SLC1A3', 'KIF22', 'NDC80', 'STK17B', 'XPO1', 'REST', 'SEPHS1', 'AURKA', 'AAMDC', 'TPX2', 'DYNLL1', 'KIF4A', 'ORC6', 'G2E3', 'PHGDH', 'EZR', 'CBX5', 'SUCO', 'HNRNPH3', 'IFT74', 'HNRNPM', 'RANBP1', 'RANGAP1', 'CDKN3', 'KIAA0586', 'DHRS7', 'CEP128', 'ERH', 'VRK1', 'EMC9', 'CDC25B', 'FAM83D', 'SMARCA1', 'CMC2', 'CEP152', 'OIP5', 'MYEF2', 'SFRP1', 'EEF1D', 'HNRNPUL1', 'CARD8', 'CDK6', 'PON2', 'PTN', 'H2AFV', 'HSPB1', 'PPP1R17', 'LSM5', 'EZH2', 'RHEB', 'SMC3', 'UBE2S', 'CBX1', 'NMU', 'NEIL3', 'WHSC1', 'NCAPG', 'CCDC34', 'MDK', 'CORO1C', 'ATP5B', 'PTGES3', 'FOXM1', 'RAD51AP1', 'CDKN1B', 'TIMELESS', 'MRPL51', 'CDCA3', 'FBXO5', 'SRSF3', 'GMNN', 'QKI', 'TTK', 'BRD8', 'KIF20A', 'LMNB1', 'H2AFY', 'SMC4', 'CEP70', 'TFDP2', 'HES1', 'ECT2', 'FXR1', 'CENPA', 'GCA', 'SFPQ', 'TTF2', 'CDC20', 'PRDX1', 'STMN1', 'NEK2', 'CENPF', 'TXNDC12', 'KIF14', 'HMGN3', 'FBXL5', 'CCND2',
'CNTRL', 'PHF19', 'CENPL', 'ENY2', 'EXOSC8', 'EGR1', 'TMPO', 'NCAPH', 'MND1', 'PSPC1', 'KIF18A', 'DESI2', 'GPSM2', 'ZC3H7A', 'CCDC18', 'CBX3', 'HNRNPA2B1', 'NPY', 'CALD1', 'ZWINT', 'CIT', 'CDKN2C', 'DDX39A', 'CENPK', 'NEUROD4', 'TUBA1B', 'STIL', 'HJURP', 'MORF4L2', 'CKS2', 'SNRPC', 'HIST1H1D', 'HIST1H1A', 'GLO1', 'DEK', 'MT2A', 'SOX9', 'MGME1', 'HNRNPR', 'NSRP1', 'DLGAP5', 'HP1BP3', 'KNSTRN', 'PALLD', 'FAM64A', 'MIS18BP1', 'SGOL1', 'AKAP12', 'TOP2A', 'DNAJB1', 'RAN', 'PCBD2', 'NES', 'MYH10', 'CCNA1', 'CCNB1', 'PSRC1', 'LDHA', 'CDCA8', 'AKIRIN2', 'TROAP', 'HNRNPA1', 'RNASEH2B', 'CKAP2', 'BORA', 'LMO7', 'SCRN1', 'IGF2BP3', 'CALCOCO2', 'DCAF7', 'ACTL6A', 'TRA2B', 'ODF2', 'SMC2', 'CDK5RAP2', 'ANP32B', 'DCTN3', 'ARHGEF39', 'RDX', 'NUSAP1', 'KIF23', 'CASC5', 'CENPO', 'KIF11', 'CEP55', 'KIF20B', 'BARD1', 'COX17', 'CENPE', 'PHLDA1', 'NEDD1', 'ASCL1', 'GAS2L3', 'BRCA2', 'TMBIM6', 'DIAPH3', 'TMX1', 'SERF2', 'PIF1', 'TICRR', 'PLK4', 'KIF2C', 'NUF2', 'HDGF', 'ANP32E', 'RAB13', 'ILF2', 'CNIH4', 'LBR', 'HNRNPLL', 'CALM2', 'SNRPG', 'CCDC150', 'HES6', 'FANCD2', 'CENPC', 'CCNA2', 'SFRP2', 'MYO10', 'G3BP1', 'PHIP', 'CDCA5', 'NCAPG2', 'RBMX', 'PLIN2', 'ZEB1', 'ADD3', 'MKI67', 'SESN3', 'INCENP', 'HIRIP3', 'CCT5', 'SCLT1', 'CENPU', 'CENPJ', 'MZT2B', 'SPC25', 'CENPH', 'CETN3', 'SMARCA5', 'HNRNPU', 'CEP112', 'ENAH', 'BUB3', 'BTG3', 'SKA1', 'DBI', 'TMEM237', 'VBP1', 'FBXO43', 'ATAD2', 'BUB1B', 'NRG1', 'CCNB2', 'TMSB15A', 'CDC25C', 'TAGLN2', 'MIS18A', 'PTMS', 'CALM3', 'C21orf58', 'PCNT', 'SRSF2', 'RACGAP1', 'SPC24', 'CCNF', 'ASRGL1', 'PPAP2B', 'NFIA', 'USP1', 'FUBP1', 'PEA15', 'NEUROD1', 'DCAF16', 'KIAA1524', 'EOMES', 'SGOL2', 'SMIM14', 'KIF15', 'H2AFZ', 'INTU', 'HMGB2', 'SAP30', 'MAD2L1', 'ANXA5', 'CEP44', 'ITGA2', 'STK17A', 'PTTG1', 'FABP5', 'RAD21', 'PSIP1', 'HNRNPK', 'MELK', 'SKA3', 'CEP295', 'IKBIP', 'CKB', 'CENPN', 'WEE1', 'HSP90B1', 'B2M', 'FAM111A', 'KIAA0101', 'PLK1', 'TPM4', 'TUBA1C', 'CTNNB1', 'PBK', 'HIST1H1E', 'DTYMK', 'HNRNPH1', 'CKAP2L', 'BUB1', 'DCXR', 'HNRNPA3', 'CDK1', 'UBB', 'FOS', 'EMX2', 'ARL6IP1', 'NUDCD2', 'KIF5B', 'SHCBP1', 'CHD7', 'ESCO2', 'ATF7IP', 'RHNO1', 'RRM2', 'ID4', 'ZNF24', 'DCP2', 'CKS1B', 'RNF26', 'FKBP2', 'GOLIM4', 'GNG5', 'LARP7', 'CEP135', 'RSRC1', 'UBE2C', 'CKAP5', 'BANF1', 'CCDC14', 'NR2F1', 'RUVBL1', 'TUBB6', 'ACBD7', 'COX8A', 'TYMS', 'TGIF1', 'JUN', 'HNRNPA0', 'C2orf69', 'LCORL', 'GEN1', 'SUZ12', 'APOLD1', 'AURKB', 'PENK', 'SOX2', 'ZBTB20', 'RTKN2', 'FIGN', 'KPNA2', 'CEP97', 'SKA2', 'CEP57L1', 'RUVBL2', 'PTTG1IP', 'SETD8', 'HIST1H1B', 'POU3F2', 'CDCA2', 'H1FX', 'RPS27L', 'UBALD2', 'PARPBP', 'ZFP36L1', 'MYBL1', 'NKAIN3', 'SAPCD2', 'PPP1CC', '44449', 'NAP1L1', 'HIST1H1C', 'ARHGAP11B', 'TUBB4B', 'H2AFX', 'HN1', 'HMGB1', 'XRCC6', 'XRCC2', 'ZMYM1', 'HIST1H4C', 'PCBP2', 'HES5', 'HMGN5', 'HSD17B11', 'HYLS1', 'ECI2', 'SMOC1', 'ARHGAP11A', 'HMGN2', 'CCDC152', 'TOP1', 'PRC1', 'CCDC167', 'CENPW', 'HSPA1B', 'HSPA1A', 'MZT1', 'TMSB4X', 'HMGN1', 'HLA-A', 'TRIM59', 'MXD3', 'MIR99AHG', 'DLEU2', 'LINC01158', 'CRNDE')
DefaultAssay(orgo_cirm43) <- 'GeneActivity'
orgo_cirm43 <- CellCycleScoring(orgo_cirm43, s.features = s.genes, g2m.features = g2m.genes, set.ident = F,search=T)
saveRDS(orgo_cirm43,file="orgo_cirm43.QC2.SeuratObject.Rds")
plt<-FeaturePlot(orgo_cirm43,features=c("G2M.Score","S.Score"),cols=c("lightgrey","red"),order=T,min.cutoff="q75")
ggsave(plt,file="cellcycle_score.pdf",width=20)
system("slack -F cellcycle_score.pdf ryan_todo")
summary(orgo_cirm43$G2M.Score)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
#-0.02677 0.01296 0.02688 0.02729 0.04103 0.11135
summary(orgo_cirm43$S.Score)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
#-0.02898 0.01271 0.02555 0.02580 0.03848 0.08810
https://www.cell.com/neuron/pdf/S0896-6273(19)30561-6.pdf Defined waves of transcription factors active is neocoritcal mid-gestation. Supplementary table 8 details this. We looked at these in both gene activity and transcription factor motif accessibility where available. The regulon is defined by the transcription factor (ChromVar Motif Score) and acts on the listed genes (Gene Activity Score). We can then relate these regulons within our subclusters to that of the human mid-gestational data (Supp table 9).
library(Seurat)
library(Signac)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(cicero)
library(SeuratWrappers)
library(ComplexHeatmap)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
library(parallel)
library(zoo)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds")
#TF Modules
tf_modules<-read.csv("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/PUBMED31303374.SuppTable8.tsv",sep="\t",head=T)
tf_modules<-tf_modules[,2:ncol(tf_modules)]
modules<-lapply(1:ncol(tf_modules),function(x) unlist(tf_modules[x]))
modules<-lapply(modules,function(x) x[x !=""])
names(modules)<-colnames(tf_modules)
#Unname genes in the vectors
modules<-lapply(modules,unname)
orgo_cirm43<-AddModuleScore(orgo_cirm43,features=modules,assay="GeneActivity",name=paste0("TF_",names(modules),"_"), search=T)
#Save them as a separate object rather than meta data
orgo_cirm43[['TF_modules']] <- CreateAssayObject(data = t(orgo_cirm43@meta.data[grepl("TF_",colnames(orgo_cirm43@meta.data))]))
orgo_cirm43@meta.data<-orgo_cirm43@meta.data[!grepl("TF_",colnames(orgo_cirm43@meta.data))]
saveRDS(orgo_cirm43,file="orgo_cirm43.QC2.SeuratObject.Rds")
#Generate Heatmap of TF ChromVar score and TF modules
modules<-as.data.frame(t(orgo_cirm43@assays$TF_modules@data))
module_tfs<-unlist(lapply(strsplit(colnames(modules),"-"),"[",2)) #set up TF names
tf_chrom<-orgo_cirm43@assays$chromvar@data #set up chromvar matrix
tfList <- getMatrixByID(JASPAR2020, ID=row.names(tf_chrom)) #correct names
tfList <-unlist(lapply(names(tfList), function(x) name(tfList[[x]])))
row.names(tf_chrom)<-tfList
tfList<-tfList[tfList %in% module_tfs]
tf_chrom<-as.data.frame(t(tf_chrom[tfList,]))
modules<-modules[module_tfs %in% colnames(tf_chrom)] #filter modules to chromvar matrix
colnames(modules)<-unlist(lapply(strsplit(colnames(modules),"-"),"[",2))
#Combine over subclusters
tf_chrom<-split(tf_chrom,orgo_cirm43$seurat_clusters) #group by rows to seurat clusters
tf_chrom<-lapply(tf_chrom,function(x) apply(x,2,median)) #take average across group
tf_chrom<-do.call("rbind",tf_chrom) #condense to smaller data frame
modules<-split(modules,paste0(orgo_cirm43$seurat_clusters)) #group by rows to seurat clusters
modules<-lapply(modules,function(x) apply(x,2,median)) #take average across group
modules<-do.call("rbind",modules) #condense to smaller data frame
modules<-t(scale(modules))
tf_chrom<-t(scale(tf_chrom))
#This col order to be checked
clus_order<-c("6","5","0","4","1","3","2")
tf_chrom<-tf_chrom[,match(clus_order,colnames(tf_chrom),nomatch=0)]
modules<-modules[,match(clus_order,colnames(modules),nomatch=0)]
plt1<-Heatmap(tf_chrom,column_order=1:ncol(tf_chrom))
plt2<-Heatmap(modules,column_order=1:ncol(modules))
pdf("tf.complexheatmap.pdf",height=20)
plt1+plt2
dev.off()
system("slack -F tf.complexheatmap.pdf ryan_todo")
#Generate a tanglegram to look at gene opening before or after chromvar activity
library(ggdendro)
library(dendextend)
tf_chrom_dend <- tf_chrom %>% dist("maximum") %>% hclust %>% as.dendrogram %>% ladderize %>% set("branches_k_color", k = 5)
modules_dend <- modules %>% dist("maximum") %>% hclust %>% as.dendrogram %>% ladderize %>% set("branches_k_color", k = 5)
tang<-tanglegram(tf_chrom_dend,modules_dend) %>% untangle(method = "ladderize")
pdf("tf.tangle.pdf",width=20)
tang %>% plot(main = paste("entanglement =", round(entanglement(tang), 2)))
dev.off()
system("slack -F tf.tangle.pdf ryan_todo")
library(JASPAR2020)
library(BSgenome.Hsapiens.UCSC.hg38)
library(Seurat)
library(Signac)
library(ggplot2)
library(ComplexHeatmap)
library(TFBSTools)
library(ggdendro)
library(dendextend)
library(parallel)
library(dplyr)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(viridis)
library(circlize)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds")
#define DA functions for parallelization
#Use LR test for atac data
da_one_v_rest<-function(i,obj,group,assay.="GeneActivity"){
da_ga_tmp <- FindMarkers(
object = obj,
ident.1 = i,
group.by = group,
test.use = 'LR',
latent.vars = 'nCount_peaks',
only.pos=T,
assay=assay.,
logfc.threshold=0
)
da_ga_tmp$da_region<-row.names(da_ga_tmp)
da_ga_tmp$enriched_group<-c(as.numeric(i)-1) #correct group name (levels vs. actual ID)
da_ga_tmp$compared_group<-c("all_other_cells")
return(da_ga_tmp)
}
#peaks
da_peaks_df<-lapply(unique(orgo_cirm43$seurat_clusters),function(x) da_one_v_rest(x,obj=orgo_cirm43,group="seurat_clusters",assay.="peaks"))
da_peaks_df<-do.call("rbind",da_peaks_df)
write.table(da_peaks_df,file="orgo_cirm43.onevrest.da_peaks.txt",sep="\t",col.names=T,row.names=T,quote=F)
system(paste0("slack -F ","orgo_cirm43.onevrest.da_peaks.txt"," ryan_todo"))
#gene activity
da_ga_df<-lapply(unique(orgo_cirm43$seurat_clusters),function(x) da_one_v_rest(x,obj=orgo_cirm43,group="seurat_clusters",assay.="GeneActivity"))
da_ga_df<-do.call("rbind",da_ga_df)
write.table(da_ga_df,file="orgo_cirm43.onevrest.da_ga.txt",sep="\t",col.names=T,row.names=T,quote=F)
system(paste0("slack -F ","orgo_cirm43.onevrest.da_ga.txt"," ryan_todo"))
#now doing chromvar
da_ga_df<-lapply(unique(orgo_cirm43$seurat_clusters),function(x) da_one_v_rest(x,obj=orgo_cirm43,group="seurat_clusters",assay.="chromvar"))
da_ga_df<-do.call("rbind",da_ga_df)
da_ga_df$TF_name<-ConvertMotifID(orgo_cirm43, id = da_ga_df$da_region)
write.table(da_ga_df,file="orgo_cirm43.onevrest.da_chromvar.txt",sep="\t",col.names=T,row.names=T,quote=F)
system(paste0("slack -F ","orgo_cirm43.onevrest.da_chromvar.txt"," ryan_todo"))
##now doing bhaduri
#da_ga_df<-lapply(unique(orgo_cirm43$seurat_clusters),function(x) da_one_v_rest(x,obj=orgo_cirm43,group="seurat_clusters",assay.="Bhaduri_modules"))
#da_ga_df<-do.call("rbind",da_ga_df)
#write.table(da_ga_df,file="orgo_cirm43.onevrest.da_eigengenes.txt",sep="\t",col.names=T,row.names=T,quote=F)
#now doing tf modules
da_ga_df<-lapply(unique(orgo_cirm43$seurat_clusters),function(x) da_one_v_rest(x,obj=orgo_cirm43,group="seurat_clusters",assay.="TF_modules"))
da_ga_df<-do.call("rbind",da_ga_df)
write.table(da_ga_df,file="orgo_cirm43.onevrest.da_tfmodules.txt",sep="\t",col.names=T,row.names=T,quote=F)
#mkdir GREAT_analysis
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(GenomicRanges)
orgo_cirm43<-readRDS("orgo_cirm43.QC.SeuratObject.Rds")
#To perform GREAT on peaks for enrichment per cluster
write("Performing GREAT on all enriched sites per annotation group", stderr())
library(rGREAT)
#format data as bed file all seurat objects have the same peak list
write("Preparing Background Set as all called peaks.", stderr())
orgo_bg_bed<-do.call("rbind",strsplit(unlist(orgo_cirm43@assays$peaks@counts@Dimnames[1]),"[-]"))
orgo_bg_bed<-as.data.frame(orgo_bg_bed)
colnames(orgo_bg_bed)<-c("chr","start","end")
orgo_bg_bed$start<-as.numeric(as.character(orgo_bg_bed$start))
orgo_bg_bed$end<-as.numeric(as.character(orgo_bg_bed$end))
orgo_bg_bed<-makeGRangesFromDataFrame(orgo_bg_bed)
cirm43_da_peaks<-read.table("orgo_cirm43.onevrest.da_peaks.txt",header=T)
dir.create("./GREAT_analysis.QC2")
write("Beginning loop through all annotation groups.", stderr())
great_processing<-function(enriched_group_input,peak_dataframe,prefix,bg){
#subset bed file to peaks enriched in input group
orgo_bed<-as.data.frame(do.call("rbind",strsplit(peak_dataframe[peak_dataframe$enriched_group==enriched_group_input,]$da_region,"-")))
colnames(orgo_bed)<-c("chr","start","end")
orgo_bed$start<-as.numeric(as.character(orgo_bed$start))
orgo_bed$end<-as.numeric(as.character(orgo_bed$end))
nrow(orgo_bed)
orgo_bed<-orgo_bed[!duplicated(orgo_bed),]
row_count<-nrow(orgo_bed)
orgo_bed$width<-orgo_bed$end-orgo_bed$start
orgo_bed<-makeGRangesFromDataFrame(orgo_bed)
#run GREAT using all peaks as background
write(paste("Using",row_count, "DA peaks from",enriched_group_input), stderr())
job = submitGreatJob(orgo_bed,bg=bg,species="hg38",request_interval=30)
tb = getEnrichmentTables(job, ontology = c("GO Molecular Function", "GO Biological Process","GO Cellular Component"))
tb = getEnrichmentTables(job, category = c("GO","Phenotype","Genes"))
#Plot gene association
pdf(paste0("./GREAT_analysis/",prefix,"_DApeaks_",enriched_group_input,".GeneAssociation.pdf"))
plotRegionGeneAssociationGraphs(job)
dev.off()
for (j in 1:length(names(tb))){
write(paste("Outputting DA GREAT Analysis for", enriched_group_input, as.character(names(tb))[j]), stderr())
tabl_name<-gsub(" ","",as.character(names(tb))[j])
write.table(as.data.frame(tb[[j]]),file=paste0("./GREAT_analysis.QC2/",prefix,"_DApeaks_",enriched_group_input,".",tabl_name,".txt"),sep="\t",col.names=T,row.names=T,quote=F)
}
}
library(parallel)
mclapply(unique(cirm43_da_peaks$enriched_group), FUN=function(x){great_processing(enriched_group_input=x,peak_dataframe=cirm43_da_peaks,prefix="cirm43",bg=orgo_bg_bed)},mc.cores=7)
library(JASPAR2020)
library(BSgenome.Hsapiens.UCSC.hg38)
library(Signac)
library(Seurat)
library(ggplot2)
library(ComplexHeatmap)
library(ggdendro)
library(dendextend)
library(parallel)
library(dplyr)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(viridis)
library(circlize)
library(TFBSTools)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
dat<-readRDS("orgo_cirm43.QC.SeuratObject.Rds")
#Plot out top ga for each cluster
da_ga<-read.csv(file="orgo_cirm43.onevrest.da_ga.txt",sep="\t")
da_ga$gene_name<-da_ga$da_region
da_ga<-da_ga[complete.cases(da_ga),]
da_ga$label<-""
for (x in unique(da_ga$enriched_group)){
selc_genes<-as.data.frame(da_ga %>% filter(enriched_group==x) %>% arrange(rev(desc(p_val_adj))) %>% slice(1:8))$da_region
da_ga[da_ga$da_region %in% selc_genes & da_ga$enriched_group==x,]$label<- da_ga[da_ga$da_region %in% selc_genes & da_ga$enriched_group==x,]$da_region
}
#Get gene activity scores data frame to summarize over subclusters (limit to handful of marker genes)
dat_ga<-as.data.frame(t(as.data.frame(dat[["GeneActivity"]]@data)))
sum_ga<-split(dat_ga,dat$seurat_clusters) #group by rows to seurat clusters
sum_ga<-lapply(sum_ga,function(x) apply(x,2,mean)) #take average across group
sum_ga<-do.call("rbind",sum_ga) #condense to smaller data frame
sum_ga<-t(scale(sum_ga))
#cluster by all marker genes
sum_da_dend <- t(sum_ga) %>% dist() %>% hclust %>% as.dendrogram %>% ladderize %>% set("branches_k_color", k = 1:3)
saveRDS(sum_da_dend,file="orgo_cirm43.geneactivity.dend.rds")
sum_ga<-sum_ga[row.names(sum_ga) %in% unique(da_ga$label),]
#annot<-hg38_atac@meta.data[,c("celltype","cluster_ID","subcluster_col","cluster_col","seurat_clusters","seurat_subcluster","celltype_col")]
#annot<-annot[!(annot$subcluster_col=="NA"),]
#annot<-annot[!duplicated(annot$cluster_ID),]
#annot<-annot[annot$cluster_ID %in% colnames(sum_ga),]
#annot<-annot[match(colnames(sum_ga),annot$cluster_ID),]
sum_ga_plot<-t(sum_ga)
#annot_clus_col<-annot[!duplicated(annot$cluster_ID),]
#side_ha<-rowAnnotation(df= data.frame(celltype=annot$celltype, cluster=annot$seurat_clusters, subcluster=annot$cluster_ID),
# col=list(
# celltype=setNames(unique(annot$celltype_col),unique(annot$celltype)),
# cluster=setNames(unique(annot$cluster_col),unique(as.character(annot$seurat_clusters))),
# subcluster=setNames(annot_clus_col$subcluster_col,annot_clus_col$cluster_ID) #due to nonunique colors present
# ))
#bottom_ha<-columnAnnotation(foo = anno_mark(at = 1:ncol(sum_ga_plot), labels = colnames(sum_ga_plot)))
colfun=colorRamp2(quantile(unlist(sum_ga_plot), probs=c(0.5,0.90,0.95)),magma(3))
plt1<-Heatmap(sum_ga_plot,
cluster_rows=sum_da_dend,
#left_annotation=side_ha,
col=colfun,
#bottom_annotation=bottom_ha,
column_names_gp = gpar(fontsize = 8),
row_names_gp=gpar(fontsize=7),
column_names_rot=90
)
pdf("orgo_cirm43.geneactivity.heatmap.pdf",height=20,width=20)
plt1
dev.off()
system("slack -F orgo_cirm43.geneactivity.heatmap.pdf ryan_todo")
########################Plot out top TF for each cluster###################
da_tf<-read.csv(file="orgo_cirm43.onevrest.da_chromvar.txt",sep="\t")
da_tf$tf_name <- unlist(lapply(unlist(lapply(da_tf$da_region, function(x) getMatrixByID(JASPAR2020,ID=x))),function(y) name(y)))
da_tf<-da_tf[complete.cases(da_tf),]
da_tf$label<-""
for (x in unique(da_tf$enriched_group)){
selc_genes<-head(as.data.frame(as.data.frame(da_tf) %>% filter(enriched_group==x) %>% arrange(rev(desc(p_val_adj)))),n=8)$tf_name
da_tf[da_tf$tf_name %in% selc_genes & da_tf$enriched_group==x,]$label<- da_tf[da_tf$tf_name %in% selc_genes & da_tf$enriched_group==x,]$tf_name
}
#Get gene activity scores data frame to summarize over subclusters (limit to handful of marker genes)
dat_tf<-as.data.frame(t(as.data.frame(dat[["chromvar"]]@data)))
sum_tf<-split(dat_tf,dat$seurat_clusters) #group by rows to seurat clusters
sum_tf<-lapply(sum_tf,function(x) apply(x,2,mean)) #take average across group
sum_tf<-do.call("rbind",sum_tf) #condense to smaller data frame
sum_tf<-t(scale(sum_tf))
sum_tf<-sum_tf[,!endsWith(colnames(sum_tf),"NA")] #remove NA (doublet cells)
#clustered by all marker genes ga
sum_da_dend<-readRDS(file="orgo_cirm43.geneactivity.dend.rds")
sum_tf<-sum_tf[row.names(sum_tf) %in% unique(da_tf[da_tf$label!="",]$da_region),]
row.names(sum_tf)<-da_tf[match(row.names(sum_tf),da_tf$da_region,nomatch=0),]$tf_name
#annot<-hg38_atac@meta.data[,c("celltype","cluster_ID","subcluster_col","cluster_col","seurat_clusters","seurat_subcluster","celltype_col")]
#annot<-annot[!(annot$subcluster_col=="NA"),]
#annot<-annot[!duplicated(annot$cluster_ID),]
#annot<-annot[annot$cluster_ID %in% colnames(sum_tf),]
#annot<-annot[match(colnames(sum_tf),annot$cluster_ID),]
sum_tf_plot<-t(sum_tf)
#annot_clus_col<-annot[!duplicated(annot$cluster_ID),]
#ide_ha<-rowAnnotation(df= data.frame(celltype=annot$celltype, cluster=annot$seurat_clusters, subcluster=annot$cluster_ID),
# col=list(
# celltype=setNames(unique(annot$celltype_col),unique(annot$celltype)),
# cluster=setNames(unique(annot$cluster_col),unique(as.character(annot$seurat_clusters))),
# subcluster=setNames(annot_clus_col$subcluster_col,annot_clus_col$cluster_ID) #due to nonunique colors present
# ))
#bottom_ha<-columnAnnotation(foo = anno_mark(at = 1:ncol(sum_tf_plot), labels = colnames(sum_tf_plot)))
colfun=colorRamp2(quantile(unlist(sum_tf_plot), probs=c(0.5,0.90,0.95)),cividis(3))
plt1<-Heatmap(sum_tf_plot,
cluster_rows=sum_da_dend,
# left_annotation=side_ha,
col=colfun,
#bottom_annotation=bottom_ha,
column_names_gp = gpar(fontsize = 8),
row_names_gp=gpar(fontsize=7),
column_names_rot=90
)
plt1<-draw(plt1)
pdf("orgo_cirm43.tf.heatmap.pdf",height=20,width=20)
draw(plt1)
dev.off()
system("slack -F orgo_cirm43.tf.heatmap.pdf ryan_todo")
#Plot motifs alongside chromvar plot
library(ggplot2)
library(patchwork)
motif_order<-names(dat@assays$peaks@motifs@motif.names[match(colnames(sum_tf_plot)[column_order(plt1)],unlist(dat@assays$peaks@motifs@motif.names),nomatch=0)])
plt<-MotifPlot(object = dat,motifs = motif_order,ncol=1)+theme_void()+theme(strip.text = element_blank())
ggsave(plt,file="orgo_cirm43.tf.heatmap.motif.pdf",height=100,width=2,limitsize=F)
system("slack -F orgo_cirm43.tf.heatmap.motif.pdf ryan_todo")
library(Signac)
library(Seurat)
library(ComplexHeatmap)
library(dplyr)
library(circlize)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds") #reading in QC passing cells
DefaultAssay(orgo_cirm43)<-"peaks"
da_peaks_df<-read.table(file="orgo_cirm43.onevrest.da_peaks.txt",sep="\t")
da_peaks_df<-da_peaks_df %>% filter(p_val_adj<=0.05)
i="peaks"
pseudobulk=AggregateExpression(orgo_cirm43,return.seurat=FALSE,slot="counts",assays=i) #aggregate by idents
pseudobulk[[1]]<-pseudobulk[[1]][row.names(pseudobulk[[1]]) %in% row.names(da_peaks_df),]
col_fun = colorRamp2(c(0.5, 1), c("white", "black"))
cor_out=cor(pseudobulk[[1]],method="spearman")
plt<-Heatmap(cor_out,col_fun)
pdf(paste0("cluster_correlation_heatmap.",i,".pdf"))
plt
dev.off()
system(paste0("slack -F cluster_correlation_heatmap.",i,".pdf ryan_todo"))
library(Signac)
library(Seurat)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds") #reading in QC passing cells
DefaultAssay(orgo_cirm43)<-"peaks"
umap_dat <- RunUMAP(object = orgo_cirm43, n.components=3,reduction = 'cistopic', dims = 1:ncol(orgo_cirm43@reductions$cistopic))
umap_dat<-as.data.frame(umap_dat@reductions$umap@cell.embeddings)
umap_dat$cluster<-orgo_cirm43@meta.data[match(row.names(umap_dat),row.names(orgo_cirm43@meta.data)),]$postqc_clusters
#check to make sure the cluster assignment is correct
library(ggplot2)
plt<-ggplot(umap_dat,aes(x=UMAP_1,y=UMAP_2,color=cluster))+geom_point()
ggsave(plt,file="3dplot.test.pdf")
system("slack -F 3dplot.test.pdf ryan_todo")
#now just assign colors, and fit blender python script format
umap_dat$cluster_color<-"NA"
umap_dat[umap_dat$cluster=="0",]$cluster_color="#ff9933"
umap_dat[umap_dat$cluster=="1",]$cluster_color="#336666"
umap_dat[umap_dat$cluster=="2",]$cluster_color="#6666cc"
umap_dat[umap_dat$cluster=="3",]$cluster_color="#6699cc"
umap_dat[umap_dat$cluster=="4",]$cluster_color="#99cc99"
umap_dat[umap_dat$cluster=="5",]$cluster_color="#ffff66"
umap_dat[umap_dat$cluster=="6",]$cluster_color="#cc6699"
umap_dat$cellID<-row.names(umap_dat)
umap_dat<-umap_dat[,c(4,6,1,2,3,5)]
write.table(umap_dat,file="orgo_postqc_blender.table",row.names=F,col.names=F,sep="\t",quote=F)
system("slack -F orgo_postqc_blender.table ryan_todo")
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(dplyr)
library(patchwork)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
library(rGREAT)
library(parallel)
library(chromVAR)
library(TFBSTools)
library(JASPAR2020)
library(ggrepel)
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds") #reading in QC passing cells
#define DA functions for parallelization
#Use LR test for atac data
differentiation_differences<-function(i,obj.=orgo_cirm43,group,assay.="peaks"){
Idents(obj.)<-obj.$seurat_clusters
obj<-subset(obj.,postqc_clusters==i)
Idents(obj)<-obj$differentiation_exp
da_peaks_tmp <- FindMarkers(
object = obj,
ident.1 = "5", #differentiation exp 5
group.by = group,
test.use = 'LR',
latent.vars = 'nCount_peaks',
only.pos=T,
assay=assay.,
logfc.threshold=0
)
da_peaks_tmp$da_region<-row.names(da_peaks_tmp)
closest_genes <- ClosestFeature(obj,da_peaks_tmp$da_region)
da_peaks_tmp<-cbind(da_peaks_tmp,closest_genes)
da_peaks_tmp$enriched_group<-c("5")
da_peaks_tmp$cluster<-c(i)
da_peaks_tmp$compared_group<-c("all_other_cells")
return(da_peaks_tmp)
}
out<-lapply(unique(orgo_cirm43$postqc_clusters),function(x) differentiation_differences(obj.=orgo_cirm43,i=x,group="differentiation_exp"))
out2<-do.call("rbind",out)
write.table(out2,file="differentiation_exp_DA_peaks.tsv",sep="\t",quote=F)
out2<-out2[out2$p_val_adj<=0.05,]
table(out2$cluster)
# 1 2 3 4 5 6
#14 1 11 21 1 7
plot_da_plots<-function(x=orgo_cirm43,i=0,assay_name="peaks",outname="differentiaion_exp",logfc.threshold_set=0,min.pct_set=0,latent.vars="NA"){
Idents(x)<-x$seurat_clusters
obj<-subset(x,postqc_clusters==i)
Idents(obj)<-obj$differentiation_exp
if(!(latent.vars=="NA")){
x_da<-FindMarkers(object=obj,ident.1 = "5", ident.2="7", test.use = 'LR', logfc.threshold=logfc.threshold_set,latent.vars = latent.vars, only.pos=F, assay=assay_name,min.pct=min.pct_set)
} else {
x_da<-FindMarkers(object=obj, ident.1 = "5", ident.2 = "7", test.use = 'LR', logfc.threshold=logfc.threshold_set, only.pos=F, assay=assay_name,min.pct=min.pct_set)
}
#if statements to handle formating the different modalities
if(assay_name=="chromvar"){ #make chromvar names readable
print("Translating Chromvar Motif Names for Plot")
x_da$out_name <- unlist(lapply(unlist(lapply(row.names(x_da), function(x) getMatrixByID(JASPAR2020,ID=x))),function(y) name(y)))
} else{
x_da$out_name<-row.names(x_da)
}
if(assay_name=="peaks"){ #assign peak output to closest features
closest_genes <- ClosestFeature(x,row.names(x_da))
x_da <-cbind(x_da ,closest_genes)
}
x_da$sig<-ifelse(x_da$p_val_adj<=0.05,"sig","non_sig")
x_da$label<-""
if(assay_name!="peaks"){ #ignore adding peaks names (too many), and add top 10 significant names to either side of comparison
ident_1_labels<- row.names(head(x_da[which((x_da$sig=="sig") & (x_da$avg_log2FC>0)),] ,n=10))#significant, is ident1 specific, is top 10
ident_2_labels<- row.names(head(x_da[which((x_da$sig=="sig") & (x_da$avg_log2FC<0)),] ,n=10))#significant, is ident1 specific, is top 10
x_da[c(ident_1_labels,ident_2_labels),]$label<-x_da[c(ident_1_labels,ident_2_labels),]$out_name
#trim cistrome cell line from name (for readability)
}
x_scale<-max(abs(x_da$avg_log2FC))*1.1 #find proper scaling for x axis
plt<-ggplot(x_da,aes(x=avg_log2FC,y=-log10(p_val_adj),color=sig,label=label,alpha=0.1))+
geom_point(size=0.5)+
theme_bw()+
scale_fill_manual(values=c("non_sig"="#999999", "sig"="#FF0000"))+
xlim(c(-x_scale,x_scale))+
geom_vline(xintercept=0)+
geom_hline(yintercept=-log10(0.05))+
geom_text_repel(size=2,segment.size=0.1,max.overlaps=Inf,min.segment.length = 0, nudge_y = 1, segment.angle = 20,color="black") +
theme(legend.position = "none",axis.title.x = element_blank(),axis.title.y = element_blank())
ggsave(plt,file=paste0(outname,"_",assay_name,"_",i,"_da.pdf"),width=2,units="in",height=3)
system(paste0("slack -F ",outname,"_",assay_name,"_",i,"_da.pdf"," ryan_todo"))
write.table(x_da,file=paste0(outname,"_",assay_name,"_",i,"_da.markers.txt"),col.names=T,sep="\t")
system(paste0("slack -F ",paste0(outname,"_",assay_name,"_",i,"_da.markers.txt")," ryan_todo"))
}
lapply(unique(orgo_cirm43$postqc_clusters),function(x) plot_da_plots(x=orgo_cirm43,
i=x,
assay_name="chromvar",
outname="differentiation_exp",
logfc.threshold_set=0,
min.pct_set=0))
lapply(unique(orgo_cirm43$postqc_clusters),function(x) plot_da_plots(x=orgo_cirm43,
i=x,
assay_name="peaks",
outname="differentiation_exp",
logfc.threshold_set=0,
min.pct_set=0,
latent.vars="nCount_peaks"))
out<-lapply(unique(orgo_cirm43$postqc_clusters),function(x) differentiation_differences(obj.=orgo_cirm43,i=x,group="differentiation_exp"))
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(cisTopic)
library(JASPAR2020)
library(TFBSTools)
library(grid)
library(dplyr)
library(parallel)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
library(motifmatchr)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds") #reading in QC passing cells
dat<-subset(orgo_cirm43,DIV %in% c("30","60","90"))
dat<-subset(dat,seurat_clusters %in% c("2","1","3"))
cistopic_processing<-function(seurat_input,prefix){
cistopic_counts_frmt<-seurat_input$peaks@counts #grabbing counts matrices
row.names(cistopic_counts_frmt)<-sub("-", ":", row.names(cistopic_counts_frmt)) #renaming row names to fit granges expectation of format
atac_cistopic<-cisTopic::createcisTopicObject(cistopic_counts_frmt) #set up CisTopicObjects
#Run warp LDA on objects
atac_cistopic_models<-cisTopic::runWarpLDAModels(atac_cistopic,topic=c(20:30),nCores=11,addModels=FALSE)
#Setting up topic count selection
pdf(paste(prefix,"model_selection.pdf",sep="."))
par(mfrow=c(1,3))
cirm43_cistopic_models <- selectModel(atac_cistopic_models, type='derivative')
dev.off()
system(paste0("slack -F ",paste(prefix,"model_selection.pdf",sep=".")," ryan_todo"))
print("Saving cistopic models.")
saveRDS(atac_cistopic_models,file=paste(prefix,"CisTopicObject.Rds",sep="."))
}
cistopic_processing(seurat_input=dat,prefix="orgo_cirm43.QC2.ExN")
cistopic_models<-readRDS("orgo_cirm43.QC2.ExN.CisTopicObject.Rds")
#set topics based on derivative
cisTopicObject<-cisTopic::selectModel(cistopic_models,type="derivative",keepModels=T)
#saving model selected RDS
saveRDS(cisTopicObject,file="orgo_cirm43.QC2.ExN.CisTopicObject.Rds")
####Function to include topics and umap in seurat object
cistopic_wrapper<-function(object_input=orgo_atac,cisTopicObject=orgo_cisTopicObject,resolution=0.8){
#run UMAP on topics
topic_df<-as.data.frame(cisTopicObject@selected.model$document_expects)
row.names(topic_df)<-paste0("Topic_",row.names(topic_df))
dims<-as.data.frame(uwot::umap(t(topic_df),n_components=2))
row.names(dims)<-colnames(topic_df)
colnames(dims)<-c("x","y")
dims$cellID<-row.names(dims)
dims<-merge(dims,object_input@meta.data,by.x="cellID",by.y="row.names")
#Add cell embeddings into seurat
cell_embeddings<-as.data.frame(cisTopicObject@selected.model$document_expects)
colnames(cell_embeddings)<-cisTopicObject@cell.names
n_topics<-nrow(cell_embeddings)
row.names(cell_embeddings)<-paste0("topic_",1:n_topics)
cell_embeddings<-as.data.frame(t(cell_embeddings))
#Add feature loadings into seurat
feature_loadings<-as.data.frame(cisTopicObject@selected.model$topics)
row.names(feature_loadings)<-paste0("topic_",1:n_topics)
feature_loadings<-as.data.frame(t(feature_loadings))
#combined cistopic results (cistopic loadings and umap with seurat object)
cistopic_obj<-CreateDimReducObject(embeddings=as.matrix(cell_embeddings),loadings=as.matrix(feature_loadings),assay="peaks",key="topic_")
umap_dims<-as.data.frame(as.matrix(dims[2:3]))
colnames(umap_dims)<-c("UMAP_1","UMAP_2")
row.names(umap_dims)<-dims$cellID
cistopic_umap<-CreateDimReducObject(embeddings=as.matrix(umap_dims),assay="peaks",key="UMAP_")
object_input@reductions$cistopic<-cistopic_obj
object_input@reductions$umap<-cistopic_umap
n_topics<-ncol(Embeddings(object_input,reduction="cistopic"))
object_input <- FindNeighbors(
object = object_input,
reduction = 'cistopic',
dims = 1:n_topics
)
object_input <- FindClusters(
object = object_input,
verbose = TRUE,
resolution=resolution
)
return(object_input)}
DefaultAssay(dat)<-"peaks"
dat<-cistopic_wrapper(object_input=dat,cisTopicObject=cisTopicObject,resolution=0.2)
for (i in c("DIV","seurat_clusters")){
plt<-DimPlot(dat,group.by=i)
ggsave(plt,file=paste0("ExN.qc.",i,"umap.pdf"),height=5,width=5)
system(paste0("slack -F ",paste0("ExN.qc.",i,"umap.pdf")," ryan_todo"))}
for (i in c("tss_enrichment","FRIP","log10_uniq_reads")){
plt<-FeaturePlot(dat,feature=i,order=T)
ggsave(plt,file=paste0("ExN.qc.",i,"umap.pdf"),height=5,width=5)
system(paste0("slack -F ",paste0("ExN.qc.",i,"umap.pdf")," ryan_todo"))}
saveRDS(dat,file="orgo_cirm43.QC2.ExN.SeuratObject.Rds") ###save Seurat file
dat<-readRDS("orgo_cirm43.QC2.ExN.SeuratObject.Rds")
da_one_v_rest<-function(i,obj,group,assay.="peaks"){
da_peaks_tmp <- FindMarkers(
object = obj,
ident.1 = i,
group.by = group,
test.use = 'LR',
latent.vars = 'nCount_peaks',
only.pos=T,
assay=assay.,
logfc.threshold=0.1
)
da_peaks_tmp$da_region<-row.names(da_peaks_tmp)
if(assay.=="peaks"){
closest_genes <- ClosestFeature(obj,da_peaks_tmp$da_region)
da_peaks_tmp<-cbind(da_peaks_tmp,closest_genes)}
if(assay.=="chromvar"){
tfList <- getMatrixByID(JASPAR2020, ID=row.names(da_peaks_tmp)) #Assign human readable TF motif names
tfList <-unlist(lapply(names(tfList), function(x) name(tfList[[x]])))
row.names(da_peaks_tmp)<-tfList}
da_peaks_tmp$enriched_group<-c(i)
da_peaks_tmp$compared_group<-c("all_other_cells")
return(da_peaks_tmp)
}
#Perform parallel application of DA test
n.cores=length(unique(dat$seurat_clusters))
dat_da_peaks<-mclapply(
unique(dat$seurat_clusters),
FUN=da_one_v_rest,
obj=dat,
group="seurat_clusters",
mc.cores=n.cores)
#Merge the final data frame from the list for 1vrest DA
dat_da_peaks<-do.call("rbind",dat_da_peaks)
dat_da_peaks$enriched_group<-dat_da_peaks$enriched_group-1#correct enriched group numbering
write("Outputting One v Rest DA Table.", stderr())
write.table(dat_da_peaks,file="orgo_cirm43.QC2.ExN.da_peaks.txt",sep="\t",col.names=T,row.names=T,quote=F)
as.data.frame(dat_da_peaks %>% group_by(enriched_group) %>% filter(p_val_adj<0.05) %>% arrange(p_val_adj))$gene_name
#Perform parallel application of DA test for gene activity
n.cores=length(unique(dat$seurat_clusters))
dat_da_ga<-mclapply(
unique(dat$seurat_clusters),
FUN=da_one_v_rest,
obj=dat,
group="seurat_clusters",
assay.="GeneActivity",
mc.cores=n.cores)
#Merge the final data frame from the list for 1vrest DA
dat_da_ga<-do.call("rbind",dat_da_ga)
dat_da_ga$enriched_group<-dat_da_ga$enriched_group-1#correct enriched group numbering
as.data.frame(dat_da_ga %>% group_by(enriched_group) %>% filter(p_val_adj<0.05) %>% arrange(p_val_adj))$da_region
write("Outputting One v Rest DA Table.", stderr())
write.table(dat_da_ga,file="orgo_cirm43.QC2.ExN.da_geneactivity.txt",sep="\t",col.names=T,row.names=T,quote=F)
dat_da_ga<-read.table(file="orgo_cirm43.QC2.ExN.da_geneactivity.txt",sep="\t",col.names=T,row.names=1)
#Perform parallel application of DA test for chromvar
n.cores=length(unique(dat$seurat_clusters))
dat_da_chrom<-mclapply(
unique(dat$seurat_clusters),
FUN=da_one_v_rest,
obj=dat,
group="seurat_clusters",
assay.="chromvar",
mc.cores=n.cores)
#Merge the final data frame from the list for 1vrest DA
dat_da_chrom<-do.call("rbind",dat_da_chrom)
dat_da_chrom$enriched_group<-dat_da_chrom$enriched_group-1#correct enriched group numbering
as.data.frame(dat_da_chrom %>% group_by(enriched_group) %>% filter(p_val_adj<0.05) %>% arrange(p_val_adj))$da_region
write("Outputting One v Rest DA Table.", stderr())
write.table(dat_da_chrom,file="orgo_cirm43.QC2.ExN.da_chromvar.txt",sep="\t",col.names=T,row.names=T,quote=F)
library(JASPAR2020)
library(BSgenome.Hsapiens.UCSC.hg38)
library(Signac)
library(Seurat)
library(ggplot2)
library(ComplexHeatmap)
library(ggdendro)
library(dendextend)
library(parallel)
library(dplyr)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(viridis)
library(circlize)
library(TFBSTools)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
dat<-readRDS("orgo_cirm43.QC2.ExN.SeuratObject.Rds")
#Plot out top ga for each cluster
da_ga<-read.csv(file="orgo_cirm43.QC2.ExN.da_geneactivity.txt",sep="\t")
da_ga$gene_name<-da_ga$da_region
da_ga<-da_ga[complete.cases(da_ga),]
da_ga$label<-""
for (x in unique(da_ga$enriched_group)){
selc_genes<-as.data.frame(da_ga %>% filter(enriched_group==x) %>% arrange(rev(desc(p_val_adj))) %>% slice(1:8))$da_region
da_ga[da_ga$da_region %in% selc_genes & da_ga$enriched_group==x,]$label<- da_ga[da_ga$da_region %in% selc_genes & da_ga$enriched_group==x,]$da_region
}
#Get gene activity scores data frame to summarize over subclusters (limit to handful of marker genes)
dat_ga<-as.data.frame(t(as.data.frame(dat[["GeneActivity"]]@data)))
sum_ga<-split(dat_ga,dat$seurat_clusters) #group by rows to seurat clusters
sum_ga<-lapply(sum_ga,function(x) apply(x,2,mean)) #take average across group
sum_ga<-do.call("rbind",sum_ga) #condense to smaller data frame
sum_ga<-t(scale(sum_ga))
#cluster by all marker genes
sum_da_dend <- t(sum_ga) %>% dist() %>% hclust %>% as.dendrogram %>% ladderize %>% set("branches_k_color", k = 1:3)
saveRDS(sum_da_dend,file="ExN.geneactivity.dend.rds")
sum_ga<-sum_ga[row.names(sum_ga) %in% unique(da_ga$label),]
#annot<-hg38_atac@meta.data[,c("celltype","cluster_ID","subcluster_col","cluster_col","seurat_clusters","seurat_subcluster","celltype_col")]
#annot<-annot[!(annot$subcluster_col=="NA"),]
#annot<-annot[!duplicated(annot$cluster_ID),]
#annot<-annot[annot$cluster_ID %in% colnames(sum_ga),]
#annot<-annot[match(colnames(sum_ga),annot$cluster_ID),]
sum_ga_plot<-t(sum_ga)
#annot_clus_col<-annot[!duplicated(annot$cluster_ID),]
#side_ha<-rowAnnotation(df= data.frame(celltype=annot$celltype, cluster=annot$seurat_clusters, subcluster=annot$cluster_ID),
# col=list(
# celltype=setNames(unique(annot$celltype_col),unique(annot$celltype)),
# cluster=setNames(unique(annot$cluster_col),unique(as.character(annot$seurat_clusters))),
# subcluster=setNames(annot_clus_col$subcluster_col,annot_clus_col$cluster_ID) #due to nonunique colors present
# ))
#bottom_ha<-columnAnnotation(foo = anno_mark(at = 1:ncol(sum_ga_plot), labels = colnames(sum_ga_plot)))
colfun=colorRamp2(quantile(unlist(sum_ga_plot), probs=c(0.5,0.90,0.95)),magma(3))
plt1<-Heatmap(sum_ga_plot,
cluster_rows=sum_da_dend,
#left_annotation=side_ha,
col=colfun,
#bottom_annotation=bottom_ha,
column_names_gp = gpar(fontsize = 8),
row_names_gp=gpar(fontsize=7),
column_names_rot=90
)
pdf("ExN.geneactivity.heatmap.pdf",height=20,width=20)
plt1
dev.off()
system("slack -F ExN.geneactivity.heatmap.pdf ryan_todo")
########################Plot out top TF for each cluster###################
da_tf<-read.csv(file="orgo_cirm43.QC2.ExN.da_chromvar.txt",sep="\t")
da_tf$tf_name<-row.names(da_tf)
da_tf<-da_tf[complete.cases(da_tf),]
da_tf$label<-""
for (x in unique(da_tf$enriched_group)){
selc_genes<-head(as.data.frame(as.data.frame(da_tf) %>% filter(enriched_group==x) %>% arrange(rev(desc(p_val_adj)))),n=8)$tf_name
da_tf[da_tf$tf_name %in% selc_genes & da_tf$enriched_group==x,]$label<- da_tf[da_tf$tf_name %in% selc_genes & da_tf$enriched_group==x,]$tf_name
}
#Get gene activity scores data frame to summarize over subclusters (limit to handful of marker genes)
dat_tf<-as.data.frame(t(as.data.frame(dat[["chromvar"]]@data)))
sum_tf<-split(dat_tf,dat$seurat_clusters) #group by rows to seurat clusters
sum_tf<-lapply(sum_tf,function(x) apply(x,2,mean)) #take average across group
sum_tf<-do.call("rbind",sum_tf) #condense to smaller data frame
sum_tf<-t(scale(sum_tf))
sum_tf<-sum_tf[,!endsWith(colnames(sum_tf),"NA")] #remove NA (doublet cells)
#clustered by all marker genes ga
sum_da_dend<-readRDS(file="ExN.geneactivity.dend.rds")
sum_tf<-sum_tf[row.names(sum_tf) %in% unique(da_tf[da_tf$label!="",]$da_region),]
row.names(sum_tf)<-da_tf[match(row.names(sum_tf),da_tf$da_region,nomatch=0),]$tf_name
#annot<-hg38_atac@meta.data[,c("celltype","cluster_ID","subcluster_col","cluster_col","seurat_clusters","seurat_subcluster","celltype_col")]
#annot<-annot[!(annot$subcluster_col=="NA"),]
#annot<-annot[!duplicated(annot$cluster_ID),]
#annot<-annot[annot$cluster_ID %in% colnames(sum_tf),]
#annot<-annot[match(colnames(sum_tf),annot$cluster_ID),]
sum_tf_plot<-t(sum_tf)
#annot_clus_col<-annot[!duplicated(annot$cluster_ID),]
#ide_ha<-rowAnnotation(df= data.frame(celltype=annot$celltype, cluster=annot$seurat_clusters, subcluster=annot$cluster_ID),
# col=list(
# celltype=setNames(unique(annot$celltype_col),unique(annot$celltype)),
# cluster=setNames(unique(annot$cluster_col),unique(as.character(annot$seurat_clusters))),
# subcluster=setNames(annot_clus_col$subcluster_col,annot_clus_col$cluster_ID) #due to nonunique colors present
# ))
#bottom_ha<-columnAnnotation(foo = anno_mark(at = 1:ncol(sum_tf_plot), labels = colnames(sum_tf_plot)))
colfun=colorRamp2(quantile(unlist(sum_tf_plot), probs=c(0.5,0.90,0.95)),cividis(3))
plt1<-Heatmap(sum_tf_plot,
cluster_rows=sum_da_dend,
# left_annotation=side_ha,
col=colfun,
#bottom_annotation=bottom_ha,
column_names_gp = gpar(fontsize = 8),
row_names_gp=gpar(fontsize=7),
column_names_rot=90
)
plt1<-draw(plt1)
pdf("ExN.tf.heatmap.pdf",height=20,width=20)
draw(plt1)
dev.off()
system("slack -F ExN.tf.heatmap.pdf ryan_todo")
#Plot motifs alongside chromvar plot
library(ggplot2)
library(patchwork)
motif_order<-names(dat@assays$peaks@motifs@motif.names[match(colnames(sum_tf_plot)[column_order(plt1)],unlist(dat@assays$peaks@motifs@motif.names),nomatch=0)])
plt<-MotifPlot(object = dat,motifs = motif_order,ncol=1)+theme_void()+theme(strip.text = element_blank())
ggsave(plt,file="ExN.tf.heatmap.motif.pdf",height=100,width=2,limitsize=F)
system("slack -F ExN.tf.heatmap.motif.pdf ryan_todo")
library(Signac)
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(patchwork)
library(monocle3,lib.loc="/home/groups/oroaklab/src/R/R-4.0.0/library/") #using old install of monocle, just need for as.cell_data_set conversion
library(cicero,lib.loc="/home/groups/oroaklab/src/R/R-4.0.0/library/") #and using old version of cicero
library(EnsDb.Hsapiens.v86)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
dat<-readRDS("orgo_cirm43.QC2.ExN.SeuratObject.Rds")
#Cicero processing function
cicero_processing<-function(object_input=orgo_atac,prefix="orgo_atac"){
#Generate CDS format from Seurat object
atac.cds <- as.cell_data_set(object_input,assay="peaks",reduction="umap")
# convert to CellDataSet format and make the cicero object
print("Making Cicero format CDS file")
atac.cicero <- make_cicero_cds(atac.cds, reduced_coordinates = reducedDims(atac.cds)$UMAP)
saveRDS(atac.cicero,paste(prefix,"atac_cicero_cds.Rds",sep="_"))
genome <- seqlengths(object_input) # get the chromosome sizes from the Seurat object
genome.df <- data.frame("chr" = names(genome), "length" = genome) # convert chromosome sizes to a dataframe
print("Running Cicero to generate connections.")
conns <- run_cicero(atac.cicero, genomic_coords = genome.df) # run cicero
saveRDS(conns,paste(prefix,"atac_cicero_conns.Rds",sep="_"))
print("Generating CCANs")
ccans <- generate_ccans(conns) # generate ccans
saveRDS(ccans,paste(prefix,"atac_cicero_ccans.Rds",sep="_"))
print("Adding CCAN links into Seurat Object and Returning.")
links <- ConnectionsToLinks(conns = conns, ccans = ccans) #Add connections back to Seurat object as links
Links(object_input) <- links
return(object_input)
}
for(i in unique(dat$seurat_clusters)){
dat_subset<-subset(dat,seurat_clusters==i)
dat_subset-cicero_processing(object_input=dat_subset,prefix=paste0("orgo_cirm43.QC2.ExN.",i))
saveRDS(dat_subset,paste0("orgo_cirm43.QC2.ExN.",i,".SeuratObject.unnormGA.Rds"))
}
library(Rmagic)
library(Signac)
library(Seurat)
library(ggplot2)
library(patchwork)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
dat<-readRDS("orgo_cirm43.QC2.ExN.SeuratObject.Rds")
gene_list=c("SOX2","PAX6","NHLH1","NEUROD2","MEF2C","TBR1","FOXG1","EOMES","DLX5","LHX9","BCL11B","CUX1","POU3F2")
DefaultAssay(dat)<-"GeneActivity"
dat<-magic(dat,genes=gene_list,n.jobs=6)
DefaultAssay(dat)<-"MAGIC_GeneActivity"
plt<-FeaturePlot(dat,features=gene_list,order=T,cols=c("white","red"))
ggsave(plt,file="test.magic.pdf")
system("slack -F test.magic.pdf ryan_todo")
chromvar_genes<-c("MA0143.4","MA0069.1","MA0048.2","MA0668.1","MA0497.1","MA0802.1","MA0613.1","MA0800.1","MA1476.1","MA0754.1","MA0787.1")
ga_genes<-c("SOX2","PAX6","NHLH1","NEUROD2","MEF2C","TBR1","FOXG1","EOMES","DLX5","CUX1","POU3F2")
DefaultAssay(dat)<-"MAGIC_GeneActivity"
for ( i in 1:length(ga_genes)){
plt<-FeaturePlot(dat,features=c(ga_genes[i],chromvar_genes[i]),order=T, blend=TRUE,cols=c("white","red","blue"))+ggtitle(ga_genes[i])
ggsave(plt,file=paste0(ga_genes[i],".magic.pdf"),width=20)
system(paste0("slack -F ",paste0(ga_genes[i],".magic.pdf")," ryan_todo"))
}
Adding feature overlap with Trevino hSS and hCS data to differentiate subpallial and cortical projecting neurons
https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE132403 Dataset 12 middle hSS vs hCS peaks
library(Seurat)
library(Signac)
library(data.table)
library(cisTopic)
library(GenomicRanges)
library(AUCell)
library(org.Hs.eg.db)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(ComplexHeatmap)
library(rtracklayer)
library(parallel)
library(ggplot2)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data")
#Set up DA peaks of hSS and hCS from bulk atac seq public data
trevino<-read.table("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/trevino_scATAC_GSE123403/GSE132403_Data_12_middle_hSS_vs_hCS.tsv",header=T)
trevino<-trevino[!duplicated(trevino$region.name),]
trevino_hSS<-trevino[trevino$log2FoldChange>0,]
trevino_hCS<-trevino[trevino$log2FoldChange<0,]
trevino_hCS<-trevino_hCS[complete.cases(trevino_hCS),]
trevino_hSS<-trevino_hSS[complete.cases(trevino_hSS),]
write.table(trevino_hSS,"/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/trevino_scATAC_GSE123403/GSE132403_trevino_hSS.Data12.bed",sep="\t",quote=F,col.names=F,row.names=F)
write.table(trevino_hCS,"/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/trevino_scATAC_GSE123403/GSE132403_trevino_hCS.Data12.bed",sep="\t",quote=F,col.names=F,row.names=F)
trevino_dir<-"/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/trevino_scATAC_GSE123403"
hg38_chip<-paste(trevino_dir, list.files(trevino_dir,pattern=".bed"), sep='/') #grab all files in directory
labels<-c("trevino_hCS","trevino_hSS")
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
obj<-readRDS("orgo_cirm43.ExN.SeuratObject.Rds")
cisTopicObject<-readRDS(file="orgo_cirm43.ExN.CisTopicObject.Rds")
outname="orgo_cirm43.ExN"
cisTopicObject<-addCellMetadata(cisTopicObject, cell.data =obj@meta.data) #Add signatures via cistopic
#region score association
pred.matrix <- predictiveDistribution(cisTopicObject)
cisTopicObject <- getSignaturesRegions(cisTopicObject, hg38_chip, minOverlap = 0.01, labels=labels) #output new bed files to be read in and processed
aucellRankings <- AUCell_buildRankings(pred.matrix, plot=FALSE, verbose=FALSE) # Compute cell rankings
cisTopicObject <- signatureCellEnrichment(cisTopicObject, aucellRankings, selected.signatures='all', aucMaxRank = 0.1*nrow(aucellRankings),plot=FALSE) # Check signature enrichment in cells
cisTopicObject <- getRegionsScores(cisTopicObject, method='NormTop', scale=TRUE)
cisTopicObject <- binarizecisTopics(cisTopicObject, thrP=0.999, plot=TRUE)
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
cisTopicObject <- annotateRegions(cisTopicObject, txdb=TxDb.Hsapiens.UCSC.hg38.knownGene, annoDb='org.Hs.eg.db')
cisTopicObject <- runtSNE(cisTopicObject, target='region', perplexity=200, check_duplicates=FALSE) #cluster by regions
saveRDS(cisTopicObject,paste0(outname,".CisTopicObject.Rds"))
#save cistopic region data
outdat<-lapply(cisTopicObject@binarized.cisTopics, function(x) {
temp<-merge(x,cisTopicObject@region.data,by="row.names")
temp$topic_assigned<-names(x)
return(temp)})
outdat<-data.table::rbindlist(outdat,use.names=FALSE)
write.table(outdat,file=paste0(outname,".topic_region_assignment.txt"),col.names=T,row.names=T,quote=F,sep="\t")
system(paste("slack -F ",paste0(outname,".topic_region_assignment.txt")," ryan_todo" ))
pdf(paste0(outname,".topic_annotations.pdf"))
par(mfrow=c(1,1))
signaturesHeatmap(cisTopicObject, selected.signatures = 'annotation')
plotFeatures(cisTopicObject, method='tSNE', target='region', topic_contr=NULL, colorBy=c('annotation'), cex.legend = 0.8, factor.max=.75, dim=2, legend=TRUE, intervals=20)
dev.off()
system(paste("slack -F ",paste0(outname,".topic_annotations.pdf")," ryan_todo" ))
pdf(paste0(outname,".topic_scores.pdf"))
par(mfrow=c(4,4))
plotFeatures(cisTopicObject, method='Umap', target='cell', topic_contr='Probability', colorBy=NULL, cex.legend = 0.8, factor.max=.75, dim=2, legend=TRUE)
dev.off()
system(paste("slack -F ",paste0(outname,".topic_scores.pdf")," ryan_todo" ))
#To HERE
obj[['trevino_score']] <- CreateAssayObject(
data = t(cisTopicObject@cell.data
))
saveRDS(obj,file=paste0(outname,".SeuratObject.rds"))
plt1<-FeaturePlot(obj,features=c("trevino-hCS","trevino-hSS"),order=T)
plt2<-FeaturePlot(obj,features=c("NEUROD6","LHX6","DLX5"),order=T)
ggsave(plt1/plt2,file=paste0(outname,".trevino_features.pdf"))
system(paste0("slack -F ",paste0(outname,".trevino_features.pdf")," ryan_todo"))
library(Seurat)
library(Signac)
library(patchwork)
library(ggplot2)
#Adding Ziffra Data to this label transfer via Gene Activity Scores (ATAC Data)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
obj<-readRDS("orgo_cirm43.ExN.SeuratObject.Rds")
#generate LSI matrix for normalization
obj <- RunTFIDF(obj)
obj <- FindTopFeatures(obj, min.cutoff = 'q0')
obj <- RunSVD(obj)
ziffra<-readRDS("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/Public_Data/ziffra/ziffra.SeuratObject.Rds")
DefaultAssay(ziffra)<-"Ziffra_GeneActivity"
Idents(ziffra)<-ziffra$CellType
ziffra<-NormalizeData(ziffra)
ziffra<- FindVariableFeatures(ziffra)
transfer.anchors <- FindTransferAnchors(
reference = ziffra,
reference.assay="Ziffra_GeneActivity",
query = obj,
query.assay="GeneActivity",
reduction = "cca",
features=VariableFeatures(ziffra),
verbose=T)
saveRDS(transfer.anchors,"orgo_cirm43.ExN.ziffra.transferanchors.rds")
transfer.anchors<-readRDS("orgo_cirm43.ExN.ziffra.transferanchors.rds")
obj[["celltype_prediction_ziffra"]]<- TransferData(
anchorset = transfer.anchors,
refdata = ziffra$CellType,
weight.reduction = "cca",
dims = 1:30,
prediction.assay=T
)
imputation<- TransferData(
anchorset = transfer.anchors,
refdata = GetAssayData(ziffra, assay = "Ziffra_GeneActivity", slot = "data")[VariableFeatures(ziffra),],
weight.reduction = obj[["lsi"]],
dims=1:30)
obj[["Ziffra_GeneActivity"]]<-imputation
coembed<-merge(x=ziffra,y=obj)
coembed <- ScaleData(coembed, features = VariableFeatures(ziffra), do.scale = FALSE)
coembed <- RunPCA(coembed, features = VariableFeatures(ziffra), verbose = FALSE)
coembed <- RunUMAP(coembed, dims = 1:30)
plt<-DimPlot(coembed, group.by = c("orig.ident", "CellType","seurat_clusters"))
ggsave(plt,file="orgo_cirm43.ExN.coembed.ziffra.umap.pdf",width=30,height=30,limitsize=F)
system("slack -F orgo_cirm43.ExN.coembed.ziffra.umap.pdf ryan_todo")
saveRDS(obj,file="orgo_cirm43.ExN.QC.integrated.SeuratObject.Rds")
plt1<-DimPlot(orgo_cirm43,group.by="seurat_clusters")
plt2<-FeaturePlot(orgo_cirm43,features=c("ulEN","dlEN","AstroOligo","earlyEN","IPC","IN-CGE","RG","IN-MGE","Insular-Neurons","IPC"),pt.size=0.1,min.cutoff="q75",order=T,col=c("white","black"))
plt<-plt1/plt2
ggsave(plt,file="orgo_cirm43.ExN.predictedid.ziffra.umap.png",width=20,height=30,limitsize=F)
ggsave(plt,file="orgo_cirm43.ExN.predictedid.ziffra.umap.pdf",width=20,height=30,limitsize=F)
system("slack -F orgo_cirm43.ExN.predictedid.ziffra.umap.png ryan_todo")
da_one_v_one<-function(i,obj,group,j_list,assay.="peaks"){
i<-as.character(i)
da_tmp_2<-list()
for (j in j_list){
if ( i != j){
da_peaks_tmp <- FindMarkers(
object = obj,
ident.1 = i,
ident.2 = j,
group.by = group,
test.use = 'LR',
latent.vars = 'nCount_peaks',
only.pos=T,
assay=assay.,
logfc.threshold=0.1
)
da_peaks_tmp$da_region<-row.names(da_peaks_tmp)
closest_genes <- ClosestFeature(obj,da_peaks_tmp$da_region)
da_peaks_tmp<-cbind(da_peaks_tmp,closest_genes)
da_peaks_tmp$enriched_group<-c(i)
da_peaks_tmp$compared_group<-c(j)
da_tmp_2[[paste(i,j)]]<-da_peaks_tmp
}
}
return(da_tmp_2)
}
#split cluster 2
dat_clus2<-subset(dat,seurat_clusters=="2")
cistopic_processing(seurat_input=dat_clus2,prefix="orgo_cirm43.ExN.clus2")
cistopic_models<-readRDS("orgo_cirm43.ExN.clus2.CisTopicObject.Rds")
cisTopicObject<-cisTopic::selectModel(cistopic_models,type="derivative",keepModels=T)
dat_clus2<-cistopic_wrapper(object_input=dat_clus2,cisTopicObject=cisTopicObject,resolution=0.1)
plt<-DimPlot(dat_clus2,group.by=c("DIV","seurat_clusters"))
ggsave(plt,file="ExN.clus2.qc.umap.pdf")
system("slack -F ExN.clus2.qc.umap.pdf ryan_todo")
#for cluster 2 discrimination
n.cores=2
datclus2_da_peaks<-mclapply(
unique(dat_clus2$seurat_clusters),
FUN=da_one_v_one,
obj=dat_clus2,
group="seurat_clusters",
j_list=do.call("as.character",list(unique(dat_clus2$seurat_clusters))),
mc.cores=n.cores)
#Merge the final data frame from the list for 1v1 DA
datclus2_da_peaks<-do.call("rbind",do.call("rbind",datclus2_da_peaks))
write("Outputting One v One DA Table.", stderr())
write.table( datclus2_da_peaks,file="ExN.clus2.onevone.da_peaks.txt",sep="\t",col.names=T,row.names=T,quote=F)
system("slack -F ExN.clus2.onevone.da_peaks.txt ryan_todo")
To view data in an interactive way, I converted the final seurat object for UCSCs cell browser.
I then hosted locally to test. Install cellbrowser via this (guide.)[https://cellbrowser.readthedocs.io/en/master/installation.html]
#Downloaded seurat objects off clusters
#Prepare seurat data as cellbrowser file
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
library(Signac)
library(Seurat)
library(Matrix)
library(R.utils)
library(dplyr)
#modified function from https://github.com/satijalab/seurat-wrappers/blob/master/R/cellbrowser.R just for export
ExportToCellbrowser <- function(
object,
reduction="umap",
assay.name="chromvar",
dir,
dataset.name,
marker.file,
marker.n=5,
cluster.field="seurat_clusters",
gene_list,
metadata_columns,
skip.expr.mat=FALSE) {
reducNames = reduction
# see https://satijalab.org/seurat/essential_commands.html
idents <- Idents(object = object)
cellOrder <- row.names(object@meta.data)
counts <- object@assays[[assay.name]]@data
counts<-counts[,cellOrder]
genes <- rownames(x = counts)
dr<-object@reductions[[reduction]]
meta.fields<-object@meta.data
if (!dir.exists(paths = dir)) {
dir.create(path = dir)}
# Export expression matrix
# we have to write the matrix to an mtx file
matrixPath <- file.path(dir, "matrix.mtx")
genesPath <- file.path(dir, "features.tsv")
barcodesPath <- file.path(dir, "barcodes.tsv")
if(!skip.expr.mat){
message("Writing expression matrix to ", matrixPath)
message("This may take a couple of minutes...")
writeMM(counts, matrixPath)
# easier to load if the genes file has at least two columns. Even though seurat objects
write.table(as.data.frame(cbind(rownames(counts), rownames(counts))), file=genesPath, sep="\t", row.names=F, col.names=F, quote=F)
write(colnames(counts), file = barcodesPath)
message("Gzipping expression matrix")
gzip(matrixPath,overwrite=T,remove=T)
gzip(genesPath,overwrite=T,remove=T)
gzip(barcodesPath,overwrite=T,remove=T)
}
matrixOutPath <- "matrix.mtx.gz"
#Export embeddings
df <- dr@cell.embeddings
if (ncol(x = df) > 2) {
warning('Embedding ', embedding, ' has more than 2 coordinates, taking only the first 2')
df <- df[, 1:2]
}
colnames(x = df) <- c("x", "y")
df <- data.frame(cellId = rownames(x = df), df, check.names = FALSE)
fname <- file.path(dir,"umap.coords.tsv")
message("Writing embeddings to ", fname)
write.table(df[cellOrder, ], sep="\t", file=fname, quote = FALSE, row.names = FALSE)
embeddings.conf <- sprintf('{"file": "%s.coords.tsv", "shortLabel": "Seurat %1$s"}',reduction)
coords.string <- paste0("[",paste(embeddings.conf, collapse = ",\n"),"]")
# Export metadata
enum.fields=colnames(object@meta.data)[colnames(object@meta.data) %in% metadata_columns]
fname <- file.path(dir, "meta.tsv")
message("Writing meta data to ", fname)
out.meta<-as.matrix(object@meta.data[cellOrder, enum.fields])
write.table(out.meta, sep = "\t", file = fname, quote = FALSE, row.names = FALSE)
enum.string <- paste0("[",paste(paste0('"', enum.fields, '"'), collapse = ", "), "]")
# Export markers
file <- file.path("markers.tsv")
fname <- file.path(dir, file)
da_ga<-read.table(file=marker.file)
da_ga$label<-""
for (x in unique(da_ga$enriched_group)){
selc_genes<-as.data.frame(da_ga %>% filter(enriched_group==x) %>% dplyr::arrange(p_val_adj) %>% dplyr::slice(1:marker.n))$da_region
da_ga[da_ga$da_region %in% selc_genes & da_ga$enriched_group==x,]$label<- da_ga[da_ga$da_region %in% selc_genes & da_ga$enriched_group==x,]$da_region
}
da_ga<-da_ga[da_ga$label!="",]
message("Writing top ", marker.n, " cluster markers to ", fname)
da_ga$cluster<-da_ga$enriched_group
da_ga<-da_ga[c("cluster","label","p_val_adj","enriched_group")]
colnames(da_ga)<-c("cluster","symbol","Adjusted p Value","Cluster Enrichment")
write.table(x = da_ga, file = fname, quote = FALSE, sep = "\t", col.names = T,row.names=F)
markers.string <- sprintf('markers = [{"file": "%s", "shortLabel": "Seurat Cluster Markers"}]',file)
#Export Gene list
file <- file.path("quickGenes.csv")
fname <- file.path(dir, file)
write.table(gene_list,sep=",",file = fname, quote = FALSE, row.names = FALSE)
config <- '
# This is a bare-bones cellbrowser config file auto-generated from R.
# Look at https://github.com/maximilianh/cellBrowser/blob/master/src/cbPyLib/cellbrowser/sampleConfig/cellbrowser.conf
# for a full file that shows all possible options
name="%s"
shortLabel="%1$s"
exprMatrix="%s"
tags = ["sciatac"]
meta="meta.tsv"
# possible values: "gencode-human", "gencode-mouse", "symbol" or "auto"
geneIdType="auto"
# file with gene,description (one per line) with highlighted genes, called "Dataset Genes" in the user interface
quickGenesFile="quickGenes.csv"
clusterField="%s"
labelField="%s"
enumFields=%s
%s
coords=%s
geneLabel="Gene Activity"
unit="GA"'
config <- sprintf(
config,
dataset.name,
matrixOutPath,
cluster.field,
cluster.field,
enum.string,
markers.string,
coords.string
)
confPath = file.path(dir, "cellbrowser.conf")
message("Writing cellbrowser config to ", confPath)
cat(config, file = confPath)
message("Prepared cellbrowser directory ", dir)
}
dat<-readRDS(file="orgo_cirm43.ExN.SeuratObject.Rds")
DefaultAssay(dat)<-"peaks"
hg38_marker_list<- c()
ExportToCellbrowser(
object=dat,
reduction="umap",
assay.name="chromvar",
dir="ExN_cb_chromvar",
dataset.name="ExN",
marker.file="orgo_cirm43.ExN.da_peaks.txt",
marker.n=5,
cluster.field="seurat_clusters",
gene_list=hg38_marker_list,
metadata_columns=c("cellID","uniq_orgID","differentiation_exp","DIV","seurat_clusters"),
skip.expr.mat=FALSE)
Locally building and hosting the cell browser. Using windows wsl2 ubuntu environment
#install cellbrowser via conda
#conda install -c bioconda ucsc-cell-browser
cd /mnt/c/Users/mulqueen/Documents/organoid/ExN_cb
cbBuild -o ExN_cellbrowser -p 8888
#Go to http://localhost:8888/ to use the interactive viewer
Running radial glia (RG) like and excitatory neuron like (ExN) cells serparately. This is to build a more biologically meaninful trajectory for cells. Performing both with and without cluster 4 (discontiguous)
Rerunning with PHATE to better preserve trajectory https://github.com/KrishnaswamyLab/phateR pip install –user phate install.packages(“phateR”)
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(cisTopic)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(JASPAR2020)
library(TFBSTools)
library(grid)
library(dplyr)
library(parallel)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
library(motifmatchr)
library(phateR)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
orgo_cirm43<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds") #reading in QC passing cells
orgo_cirm43<-subset(orgo_cirm43,DIV %in% c("30","60","90"))
cistopic_processing<-function(clusters=c("5","4","0"),prefix="orgo_cirm43.RG.540"){
dat<-subset(orgo_cirm43,postqc_clusters %in% clusters) #4 is excluded as suspected IPC #5is excluded as stem like
dat$original_clusters<-dat$postqc_clusters
seurat_input<-dat
cistopic_counts_frmt<-seurat_input$peaks@counts #grabbing counts matrices
row.names(cistopic_counts_frmt)<-sub("-", ":", row.names(cistopic_counts_frmt)) #renaming row names to fit granges expectation of format
atac_cistopic<-cisTopic::createcisTopicObject(cistopic_counts_frmt) #set up CisTopicObjects
#Run warp LDA on objects
atac_cistopic_models<-cisTopic::runWarpLDAModels(atac_cistopic,topic=c(20:30),nCores=11,addModels=FALSE)
#Setting up topic count selection
pdf(paste(prefix,"model_selection.pdf",sep="."))
par(mfrow=c(1,3))
cirm43_cistopic_models <- selectModel(atac_cistopic_models, type='derivative')
dev.off()
system(paste0("slack -F ",paste(prefix,"model_selection.pdf",sep=".")," ryan_todo"))
print("Saving cistopic models.")
#set topics based on derivative
cisTopicObject<-cisTopic::selectModel(atac_cistopic_models,type="derivative",keepModels=T)
saveRDS(atac_cistopic_models,file=paste(prefix,"CisTopicObject.Rds",sep="."))
return(dat)
}
####Function to include topics and umap in seurat object
cistopic_wrapper<-function(object_input,resolution=0.8,outname){#,use_phate_dim_reduc=FALSE
prefix=outname
cisTopicObject<-readRDS(file=paste0(prefix,".CisTopicObject.Rds"))
obj<-object_input
cisTopicObject<-addCellMetadata(cisTopicObject, cell.data =obj@meta.data) #Add signatures via cistopic
#run UMAP on topics
topic_df<-as.data.frame(cisTopicObject@selected.model$document_expects)
row.names(topic_df)<-paste0("Topic_",row.names(topic_df))
#if(!use_phate_dim_reduc){
dims<-as.data.frame(uwot::umap(t(topic_df),n_components=2))
#} else {
#dims<-as.data.frame(phate(t(obj@assays$peaks@data))) #run on peaks or cistopic object??
#}
row.names(dims)<-colnames(topic_df)
colnames(dims)<-c("x","y")
dims$cellID<-row.names(dims)
dims<-merge(dims,object_input@meta.data,by.x="cellID",by.y="row.names")
#Add cell embeddings into seurat
cell_embeddings<-as.data.frame(cisTopicObject@selected.model$document_expects)
colnames(cell_embeddings)<-cisTopicObject@cell.names
n_topics<-nrow(cell_embeddings)
row.names(cell_embeddings)<-paste0("topic_",1:n_topics)
cell_embeddings<-as.data.frame(t(cell_embeddings))
#Add feature loadings into seurat
feature_loadings<-as.data.frame(cisTopicObject@selected.model$topics)
row.names(feature_loadings)<-paste0("topic_",1:n_topics)
feature_loadings<-as.data.frame(t(feature_loadings))
#combined cistopic results (cistopic loadings and umap with seurat object)
cistopic_obj<-CreateDimReducObject(embeddings=as.matrix(cell_embeddings),loadings=as.matrix(feature_loadings),assay="peaks",key="topic_")
#if(!use_phate_dim_reduc){
dimrec_dims<-as.data.frame(as.matrix(dims[2:3]))
colnames(dimrec_dims)<-c("UMAP_1","UMAP_2")
row.names(dimrec_dims)<-dims$cellID
#} else {
#dimrec_dims<-as.data.frame(as.matrix(dims[,c("x","y")]))
#colnames(dimrec_dims)<-c("PHATE_1","PHATE_2")
#}
#if(!use_phate_dim_reduc){
cistopic_dimrec<-CreateDimReducObject(embeddings=as.matrix(dimrec_dims),assay="peaks",key="UMAP_")
#} else {
#cistopic_dimrec<-CreateDimReducObject(embeddings=as.matrix(dimrec_dims),assay="peaks",key="PHATE_")
#}
object_input@reductions$cistopic<-cistopic_obj
object_input@reductions$umap<-cistopic_dimrec
n_topics<-ncol(Embeddings(object_input,reduction="cistopic"))
object_input <- FindNeighbors(
object = object_input,
reduction = 'cistopic',
dims = 1:n_topics
)
object_input <- FindClusters(
object = object_input,
verbose = TRUE,
resolution=resolution
)
for(i in c("DIV","postqc_clusters","original_clusters")){
plt<-DimPlot(object_input,group.by=i)
ggsave(plt,file=paste0(outname,".",i,"umap.pdf"),width=15)
system(paste0("slack -F ",paste0(outname,".",i,"umap.pdf")," ryan_todo"))
}
return(object_input)}
da_one_v_one<-function(obj,cluster,group,i,j,assay.="peaks"){
obj<-subset(obj,postqc_clusters==cluster)
da_peaks_tmp <- FindMarkers(
object = obj,
ident.1 = i,
ident.2 = j,
group.by = group,
test.use = 'LR',
latent.vars = 'nCount_peaks',
only.pos=F,
assay=assay.,
logfc.threshold=0.1
)
if(assay.=="peaks"){
da_peaks_tmp$da_region<-row.names(da_peaks_tmp)
closest_genes <- ClosestFeature(obj,da_peaks_tmp$da_region)
da_peaks_tmp<-cbind(da_peaks_tmp,closest_genes)
}
if(assay.=="chromvar"){
da_peaks_tmp$da_region<-ConvertMotifID(obj,id=row.names(da_peaks_tmp))
}
da_peaks_tmp$enriched_group<-c(i)
da_peaks_tmp$compared_group<-c(j)
return(da_peaks_tmp)
}
dat<-cistopic_processing(clusters=c("5","4","0"),prefix="orgo_cirm43.RG.540")
DefaultAssay(dat)<-"peaks"
dat<-cistopic_wrapper(object_input=dat,resolution=0.2,outname="orgo_cirm43.RG.540")
saveRDS(dat,file="orgo_cirm43.RG.540.SeuratObject.Rds") ###save Seurat file
#dat<-cistopic_processing(clusters=c("5","0"),prefix="orgo_cirm43.RG.50")
#DefaultAssay(dat)<-"peaks"
#dat<-cistopic_wrapper(object_input=dat,resolution=0.2,outname="orgo_cirm43.RG.50")
#saveRDS(dat,file="orgo_cirm43.RG.50.SeuratObject.Rds") ###save Seurat file
dat<-readRDS(file="orgo_cirm43.RG.540.SeuratObject.Rds")
out<-lapply(c("5","4","0"),function(x) da_one_v_one(obj=dat,cluster=x,i="60",j="90",group="DIV"))
out<-do.call("rbind",out)
out<-out[out$p_val_adj<0.1,]
write.table(out,file="orgo_cirm43.RG.540.percluster_DIV.peaks.tsv",col.names=T,row.names=F,sep="\t")
system("slack -F orgo_cirm43.RG.540.percluster_DIV.peaks.tsv ryan_todo")
#out<-lapply(c("5","4","0"),function(x) da_one_v_one(obj=dat,cluster=x,i="60",j="90",group="DIV",assay.="chromvar"))
#out<-do.call("rbind",out)
#out<-out[out$p_val_adj<0.1,]
#write.table(out,file="orgo_cirm43.RG.540.percluster_DIV.chromvar.tsv",col.names=T,row.names=F,sep="\t")
#system("slack -F orgo_cirm43.RG.540.percluster_DIV.chromvar.tsv ryan_todo")
library(JASPAR2020)
library(TFBSTools)
library(universalmotif)
library(Signac)
library(Seurat)
library(GenomicRanges)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
set.seed(1234)
library(BiocParallel)
register(MulticoreParam(5))
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
#download cluster root motifs
system("wget --no-check-certificate https://jaspar2020.genereg.net/static/clustering/2020/vertebrates/CORE/interactive_trees/JASPAR_2020_matrix_clustering_vertebrates_cluster_root_motifs.tf") #use JASPAR2020 motif clusters
tf<-read_transfac("JASPAR_2020_matrix_clustering_vertebrates_cluster_root_motifs.tf") #read in transfac format
#set up PWMatrix-List
pfm<-lapply(tf,function(x) convert_motifs(x,class="TFBSTools-PWMatrix"))
names(pfm)<-lapply(pfm,function(x) x@name)
pfm<-do.call(PWMatrixList,pfm)
jaspar_processing<-function(x){
dat<-readRDS(file=x) ###read Seurat file
#Run regular chromvar
# Scan the DNA sequence of each peak for the presence of each motif, using orgo_atac for all objects (shared peaks)
motif.matrix <- CreateMotifMatrix(features = granges(dat), pwm = pfm, genome = 'hg38', use.counts = FALSE)
# Create a new Mofif object to store the results
motif <- CreateMotifObject(data = motif.matrix, pwm = pfm)
dat[["peaks_2"]]<-dat[["peaks"]] #duplicating peaks assay so as to not overwrite current motifs
DefaultAssay(dat)<-"peaks_2"
# Add the Motif object to the assays and run ChromVar
dat<- SetAssayData(object = dat, assay = 'peaks_2', slot = 'motifs', new.data = motif)
dat<- RegionStats(object = dat, genome = BSgenome.Hsapiens.UCSC.hg38)
dat<- RunChromVAR( object = dat,genome = BSgenome.Hsapiens.UCSC.hg38,new.assay.name="jaspar_tffamily")
saveRDS(dat,file=x)
saveRDS(dat,file=paste0(x,".backup.rds"))
}
jaspar_processing(x="orgo_cirm43.RG.540.SeuratObject.Rds")
#jaspar_processing(x="orgo_cirm43.RG.50.SeuratObject.Rds")
Using slingshot to generate the trajectory and GAMs to fit matrix rows to pseudotime.
#Trying Slingshot instead
#follow this https://kstreet13.github.io/bioc2020trajectories/articles/workshopTrajectories.html
library(slingshot)
library(Seurat)
library(Signac)
library(scales)
library(viridis)
library(Matrix)
library(SingleCellExperiment)
library(patchwork)
library(ggplot2)
library(parallel)
library(JASPAR2020)
library(TFBSTools)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(reshape2)
library(qvalue)
library(RColorBrewer)
library(zoo)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
slingshot_processing<-function(x,prefix){
atac_sub<-readRDS(x)
DefaultAssay(atac_sub)<-"peaks"
for(i in c("DIV","postqc_clusters","original_clusters")){
plt<-DimPlot(atac_sub,group.by=i)
ggsave(plt,file=paste0(prefix,".",i,"umap.pdf"),width=5,height=5)
system(paste0("slack -F ",paste0(prefix,".",i,"umap.pdf")," ryan_todo"))
}
sce<-SingleCellExperiment(atac_sub[["peaks"]]@data,colData=atac_sub@meta.data)
reducedDims(sce) <- list(UMAP=atac_sub@reductions$umap@cell.embeddings[row.names(atac_sub@meta.data),])
sce <- slingshot(sce, reducedDim="UMAP", clusterLabels = colData(sce)$postqc_clusters, start.clus = "5" ) #use seurat clusters as cluster labels
#reducedDims(sce) <- list(CISTOPIC=atac_sub@reductions$cistopic@cell.embeddings[row.names(atac_sub@meta.data),])
#sce <- slingshot(sce, reducedDim="CISTOPIC", clusterLabels = colData(sce)$postqc_clusters, start.clus = "5" ) #use seurat clusters as cluster labels
colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100) #set up color palette
plotcol <- colors[cut(sce$slingPseudotime_1, breaks=100)]
pdf(file=paste0(prefix,".slingshot.pseudotime.pdf")) #print pseudotime over RG dims plot
plot(reducedDims(sce)$UMAP, col = plotcol, pch=16, asp = 1)
lines(SlingshotDataSet(sce), lwd=1, col='black')
ggplot()+geom_density(aes(x=sce$slingPseudotime_1,group=sce$DIV,fill=as.factor(sce$DIV)),alpha=0.1)+theme_minimal()
dev.off()
system(paste0("slack -F ",prefix,".slingshot.pseudotime.pdf ryan_todo"))
atac_sub$pseudotime<-sce$slingPseudotime_1 #set up pseudotime metadata
#plot metadata variables over pseudotime
plt1<-ggplot()+geom_density(aes(x=atac_sub$pseudotime,group=atac_sub$DIV,fill=as.factor(atac_sub$DIV)),alpha=0.1)+theme_minimal()
plt2<-ggplot()+geom_density(aes(x=atac_sub$pseudotime,group=atac_sub$Phase,fill=as.factor(atac_sub$Phase)),alpha=0.1)+theme_minimal()
plt3<-ggplot()+geom_density(aes(x=atac_sub$pseudotime,group=atac_sub$differentiation_exp,fill=as.factor(atac_sub$differentiation_exp)),alpha=0.1)+theme_minimal()
plt4<-ggplot()+geom_density(aes(x=atac_sub$pseudotime,group=atac_sub$postqc_clusters,fill=as.factor(atac_sub$postqc_clusters)),alpha=0.1)+theme_minimal()
plt5<-FeaturePlot(atac_sub,feature="pseudotime",order=T)
ggsave(plt1/plt2/plt3/plt4/plt5,file=paste0(prefix,".metadatadensity.pseudotime.pdf"))
system(paste0("slack -F ",prefix,".metadatadensity.pseudotime.pdf ryan_todo"))
saveRDS(atac_sub,file=x)
#shared rows only for ga and motif mean
#ga_mean<-ga_mean[row.names(ga_mean) %in% row.names(motif_loess),]
#motif_loess<-motif_loess[row.names(motif_loess) %in% row.names(ga_mean),]
}
slingshot_processing(x="orgo_cirm43.RG.540.SeuratObject.Rds",prefix="orgo_cirm43.RG.540")
#slingshot_processing(x="orgo_cirm43.RG.50.SeuratObject.Rds",prefix="orgo_cirm43.RG.50")
atac_sub<-subset(atac_sub,pseudotime!="NA") #remove NA values
Using Supplementary Table S3 from Bhaduri paper for organoid cluster markers. Uploaded to the server to be processed into an R data set.
rationale<-read.table("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/ref/Bhaduri_S3.organoidmarkers.rationale.tsv.txt",sep="\t",head=T)
colnames(rationale)[1]<-"cluster"
markers<-read.table("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/ref/Bhaduri_S3.organoidmarkers.tsv.txt",sep="\t",head=T)
markers<-markers[markers$adjusted.p_val<=0.05,]
out<-merge(markers,rationale,by="cluster")
saveRDS(out,file="/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/ref/Bhaduri_clusters.rds")
library(slingshot)
library(Seurat)
library(Signac)
library(viridis)
library(Matrix)
library(SingleCellExperiment)
library(patchwork)
library(ggplot2)
library(parallel)
library(JASPAR2020)
library(TFBSTools)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(reshape2)
library(qvalue)
library(RColorBrewer)
library(zoo)
library(mgcv)
library(scales)
library(ape)
library(ggdendro)
library(dendextend)
library(dendsort)
library(readxl)
library(seriation)
library(universalmotif)
library(Ckmeans.1d.dp)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
########Heatmap shoutouts###########
#markers<-readRDS(file="/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/ref/Bhaduri_clusters.rds")
#system("wget --no-check-certificate https://static-content.springer.com/esm/art%3A10.1038%2Fs41593-020-00794-1/MediaObjects/41593_2020_794_MOESM3_ESM.xlsx")
markers <- read_excel("41593_2020_794_MOESM3_ESM.xlsx", sheet = 9) #Supp Table 8
#system("wget https://jaspar2020.genereg.net/static/clustering/2020/vertebrates/CORE/interactive_trees/JASPAR_2020_matrix_clustering_vertebrates_central_motifs_IDs.tab")
#system("wget https://jaspar2020.genereg.net/static/clustering/2020/vertebrates/CORE/interactive_trees/JASPAR_2020_matrix_clustering_vertebrates_cluster_root_motifs.tf")
tf_fam_names<-read.table("JASPAR_2020_matrix_clustering_vertebrates_central_motifs_IDs.tab")
#from https://broadinstitute.github.io/2019_scWorkshop/functional-pseudotime-analysis.html
# Fit GAM for each gene using pseudotime as independent variable.
gam_per_row<-function(var.,pseudotime.=pseudotime,k_in,bins=100){
out_list<-list()
d <- data.frame(y=var., t=pseudotime.)
tmp <- gam(y ~ s(t,k=k_in), data=d) #k sets knots (kind of), which alters wiggliness of fit line
pd=data.frame(t=seq(from=min(pseudotime.),to=max(pseudotime.),by=(max(pseudotime.)-min(pseudotime.))/bins))
out_list[["out"]]<-predict.gam(tmp,newdata=pd)
out_list[["p"]] <- as.numeric(summary(tmp)$p.pv)
return(out_list)
}
zscore_per_row<-function(var,pseudotime.=pseudotime,bins=100){
out_list<-list()
d <- data.frame(y=scale(var), t=pseudotime.[match(names(pseudotime.),names(var))])
d_window<-rollapply(d[order(d$t),]$y,width = floor(length(d$y)/bins),by = floor(floor(length(d$y)/bins)/5),FUN = mean, align = "left") #width of 1% of windows and step of 20% of width #
return(d_window)
}
pseudotime_gam_fit<-function(
obj=atac_sub,x=chromvar,y=pseudotime,
z=-1,prefix="test",colfun,
filt_to_top_perc=-1,bins=100,filt_markers="NA",cluster=FALSE
){
#z= k knots in fit, -1 using cross validation to automatically determine;
#filt_to_top_perc -1 means no filter, any number [0-1] is top quantile of variance kept (0.9 means var > 90% quantile kept)
if(filt_to_top_perc != -1){
x<-x[which(apply(x,1,var)>quantile(unlist(apply(x,1,var)),filt_to_top_perc)),]
}
if(filt_markers!="NA"){
x<-x[row.names(x) %in% filt_markers,]
}
print("Fitting GAMs")
gam.out<-mclapply(1:nrow(x), function(i) gam_per_row(var.=x[i,],k_in=z,bins=bins),mc.cores=20)
i<-1
gam.pval<-as.data.frame(cbind(gene=row.names(x),pval=sapply(unlist(lapply(gam.out,"[[",2)),as.numeric)))
gam.pval$qval<-p.adjust(p=gam.pval$pval,method="bonferroni")
saveRDS(gam.pval,file=paste0(prefix,".pseudotime.GAM.540.pval.rds"))
print("Generating Fit Curves")
gam.dat<-as.data.frame(do.call("rbind",lapply(gam.out,"[[",1)))
row.names(gam.dat)<-row.names(x)
saveRDS(gam.pval,file=paste0(prefix,".pseudotime.GAM.540.values_unscaled.rds"))
print("Scaling Curves")
gam.dat<-apply(gam.dat,1,rescale)
gam.dat<-t(gam.dat)
gam.dat<-gam.dat[order(unlist(lapply(1:nrow(gam.dat),function(i) which(gam.dat[i,]==max(gam.dat[i,]))))),]
result <- Ckmeans.1d.dp(unlist(lapply(1:nrow(gam.dat),function(i) which(gam.dat[i,]==max(gam.dat[i,])))))
k <- max(result$cluster)
if(endsWith(prefix,"GA") | endsWith(prefix,"chromvar")){
label_idx=match(row.names(gam.dat),markers$gene)
col_markers<-setNames(
nm=c("new1","new2","new3","new4","new5","new6","new7","new8","new9"),
c("#9ed0e8","#aeb3da","#ffe7b3","#fed9b2","#007cbc","#414ca0","#febf0f","#f7921e","#a7d27c")
)
markers$col<-unname(col_markers[match(markers$cluster,names(col_markers) )]) #set up color of text by cell subtype
markers<- as.data.frame(markers %>% filter(p_val_adj<0.05) %>% group_by(cluster) %>% slice_min(p_val_adj,n=10))
label_idx=match(row.names(gam.dat),markers$gene)
label_idx<-label_idx[!is.na(label_idx)]
ha = rowAnnotation(genes = anno_mark(at = which(!is.na(label_idx)),
labels = row.names(gam.dat)[which(!is.na(label_idx))],
labels_gp = gpar(col =markers[label_idx[!is.na(label_idx)],]$col,fontsize=3)
))
print("Making GA or Chromvar Heatmap")
if(!cluster){
plt1<-Heatmap(gam.dat,
row_order=1:nrow(gam.dat),
column_order=1:ncol(gam.dat),
row_names_gp = gpar(fontsize = 3),
column_names_gp = gpar(fontsize = 3),
show_column_names=T,
right_annotation=ha,
row_split=result$cluster,
col=colfun)
} else {
plt1<-Heatmap(motif_sub,
column_order=1:ncol(motif_sub),
cluster_rows = as.dendrogram(o1[[1]]),
row_names_gp = gpar(fontsize = 3),
column_names_gp = gpar(fontsize = 3),
show_column_names=T,
right_annotation=ha,
row_split=result$cluster,
col=colfun,
show_heatmap_legend=T)}
} else if (endsWith(prefix,"family")){
row.names(gam.dat)<-unlist(lapply(row.names(gam.dat),function(i) gsub("-","_",i)))
label_idx=match(row.names(gam.dat),tf_fam_names$V1)
ha = rowAnnotation(genes = anno_mark(at = 1:nrow(gam.dat),
labels = tf_fam_names[label_idx,]$V3,
labels_gp = gpar(fontsize=30)
))
print("Making TF Family Heatmap")
if(!cluster){
plt1<-Heatmap(gam.dat,
row_order=1:nrow(gam.dat),
column_order=1:ncol(gam.dat),
row_names_gp = gpar(fontsize = 3),
column_names_gp = gpar(fontsize = 3),
show_column_names=T,
right_annotation=ha,
row_split=result$cluster,
col=colfun)
} else {
plt1<-Heatmap(motif_sub,
column_order=1:ncol(motif_sub),
cluster_rows = as.dendrogram(o1[[1]]),
row_names_gp = gpar(fontsize = 3),
column_names_gp = gpar(fontsize = 3),
show_column_names=T,
right_annotation=ha,
row_split=result$cluster,
col=colfun,
show_heatmap_legend=T)
}
} else {
print("Making Other Heatmap")
if(!cluster){
plt1<-Heatmap(gam.dat,
row_order=1:nrow(gam.dat),
column_order=1:ncol(gam.dat),
row_names_gp = gpar(fontsize = 3),
column_names_gp = gpar(fontsize = 3),
show_column_names=T,
right_annotation=ha,
row_split=result$cluster,
col=colfun)
} else {
plt2<-Heatmap(motif_sub,
column_order=1:ncol(motif_sub),
#row_order=1:nrow(motif_sub),
row_split=TRUE,
cluster_rows=hc_motif,
row_names_gp = gpar(fontsize = 3),
column_names_gp = gpar(fontsize = 3),
row_split=result$cluster,
show_column_names=T,
right_annotation=ha,
col=colfun,
show_heatmap_legend=T)
}
}
pdf(paste0(prefix,".pseudotime.heatmap.pdf"),width=50,height=50)
plt<-draw(plt1)#,annotation_legend_list=lgd_list)
dev.off()
system(paste0("slack -F ",prefix,".pseudotime.heatmap.pdf"," ryan_todo"))
if (endsWith(prefix,"family")){
library("ggseqlogo")
tf<-read_transfac("JASPAR_2020_matrix_clustering_vertebrates_cluster_root_motifs.tf") #read in transfac format
pfm<-lapply(tf,function(x) convert_motifs(x,class="TFBSTools-PFMatrix"))
names(pfm)<-lapply(pfm,function(x) x@name)
plt_motif<-lapply(row.names(gam.dat), function(j){
ggplot() + geom_logo( data=pfm[j][[1]]@profileMatrix, seq_type="dna")+theme_void()
})
plt_motif<-wrap_plots(plt_motif,ncol=1)
ggsave(plt_motif,file=paste0(prefix,".pseudotime.chromvar.heatmap.tffamilitymotifs.pdf"),height=30,width=2,limitsize=F)
system(paste0("slack -F ",prefix,".pseudotime.chromvar.heatmap.tffamilitymotifs.pdf"," ryan_todo"))
}
return(gam.dat)
}
pseudotime_processing<-function(x,prefix){
atac_sub<-readRDS(x)
atac_sub<-subset(atac_sub,pseudotime!="NA") #remove NA values
#Test if things vary by pseudotime, set up matrices to test
geneactivity=as.matrix(atac_sub@assays$GeneActivity@data) # gene activity
cistopics=t(atac_sub@reductions$cistopic@cell.embeddings) # cistopic
pseudotime<-atac_sub$pseudotime
#chromvar=as.matrix(atac_sub@assays$chromvar@data) # chromvar
jaspar_tffamily=as.matrix(atac_sub@assays$jaspar_tffamily@data) #pwm families
#tfList <- getMatrixByID(JASPAR2020, ID=row.names(chromvar)) #Assign human readable TF motif names
#tfList <-unlist(lapply(names(tfList), function(x) name(tfList[[x]])))
#row.names(chromvar)<-tfList
#Ensure cell ordering is the same
#chromvar<-chromvar[,match(colnames(chromvar), names(pseudotime))]
jaspar_tffamily<-jaspar_tffamily[,match(colnames(jaspar_tffamily), names(pseudotime))]
cistopics<-cistopics[,match(colnames(cistopics),names(pseudotime))]
geneactivity<-geneactivity[,match(colnames(geneactivity),names(pseudotime))]
cividis_col<-colorRamp2(c(0, 0.5, 1), cividis(3))
jaspar_tf_gam<-pseudotime_gam_fit(x=jaspar_tffamily,y=pseudotime,z=-1,prefix=paste0(prefix,".jaspar_tffamily"),colfun=cividis_col,filt_to_top_perc=0.7,bins=100)
saveRDS(jaspar_tf_gam,file=paste0(prefix,".jaspar_tffamily",".gam_dat.rds"))
#cividis_col<-colorRamp2(c(0, 0.5, 1), cividis(3))
#chromvar_tf_gam<-pseudotime_gam_fit(x=chromvar,y=pseudotime,z=-1,prefix=paste0(prefix,".chromvar"),colfun=cividis_col,filt_to_top_perc=0.80,bins=100)
cistopic_col<-colorRamp2(c(0, 0.5, 1), rev(c("#004529","#78c679","#f7f7f7")))
cistopic_gam<-pseudotime_gam_fit(x=cistopics,y=pseudotime,z=-1,prefix=paste0(prefix,".cistopic"),colfun=cistopic_col,bins=100)
saveRDS(cistopic_gam,file=paste0(prefix,".cistopic",".gam_dat.rds"))
magma_col<-colorRamp2(c(0,0.7, 1), magma(3))
#ga_gam<-pseudotime_gam_fit(x=geneactivity,y=pseudotime,z=-1,prefix=paste0(prefix,".GA"),colfun=magma_col,filt_to_top_perc=0.99,bins=100)
ga_gam<-pseudotime_gam_fit(x=geneactivity,y=pseudotime,z=-1,prefix=paste0(prefix,".GA"),colfun=magma_col,filt_to_top_perc=-1,bins=100,filt_markers=markers$gene)
saveRDS(ga_gam,file=paste0(prefix,".GA",".gam_dat.rds"))
dat<-cor(t(ga_gam),t(jaspar_tf_gam))
plt<-Heatmap(dat,
row_names_gp = gpar(fontsize = 10),
column_names_gp = gpar(fontsize = 10),
show_column_names=T,
)
pdf(paste0(prefix,".pseudotime.cor.heatmap.pdf"),width=50,height=50)
plt<-draw(plt)#,annotation_legend_list=lgd_list)
dev.off()
system(paste0("slack -F ",prefix,".pseudotime.cor.540.heatmap.pdf"," ryan_todo"))
}
pseudotime_processing(x="orgo_cirm43.RG.540.SeuratObject.Rds",prefix="orgo_cirm43.RG.540")
#pseudotime_processing(x="orgo_cirm43.RG.50.SeuratObject.Rds",prefix="orgo_cirm43.RG.50")
Plotting single gene/TF family trajectories
library(patchwork)
library(ggplot2)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
prefix="orgo_cirm43.RG.540"
ga_gam<-readRDS(file=paste0(prefix,".GA",".gam_dat.rds"))
jaspar_tf_gam<-readRDS(file=paste0(prefix,".jaspar_tffamily",".gam_dat.rds"))
plot_through_time<-function(ga=ga_gam,tf=jaspar_tf_gam,gene="LHX6",tf_fam=)
Use chromVAR motifmatchr to check for peak overlap with topic bed files and motifs
library(Seurat)
library(Signac)
library(ggplot2)
set.seed(1234)
library(patchwork)
library(JASPAR2020)
library(motifmatchr)
library(chromVAR)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
atac_sub<-readRDS("orgo_cirm43.RG_subset.pseudotime.SeuratObject.Rds")
cisTopicObject<-readRDS(file="orgo_cirm43.RG.CisTopicObject.Rds")
# Get a list of motif position frequency matrices from the JASPAR database
pfm <- getMatrixSet(x = JASPAR2020, opts = list(species =9606, all_versions = FALSE))
# Scan the DNA sequence of each peak for the presence of each motif, using orgo_atac for all objects (shared peaks)
motif.matrix <- CreateMotifMatrix(features = granges(orgo_cirm43), pwm = pfm, genome = 'hg38', use.counts = FALSE)
# Create a new Mofif object to store the results
motif <- CreateMotifObject(data = motif.matrix, pwm = pfm)
motifs <- getJasparMotifs()
motif_ix <- matchMotifs(motifs, counts_filtered,
genome = BSgenome.Hsapiens.UCSC.hg19)
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
set.seed(1234)
library(EnsDb.Hsapiens.v86)
library(Matrix)
library(dplyr)
library(patchwork)
setwd("/home/groups/oroaklab/adey_lab/projects/BRAINS_Oroak_Collab/organoid_finalanalysis")
all_cells<-readRDS("orgo_cirm43.preQC2.SeuratObject.Rds")
processed_cells<-readRDS("orgo_cirm43.QC2.SeuratObject.Rds")
processed_metadata<-as.data.frame(processed_cells@meta.data[colnames(processed_cells@meta.data)[!(colnames(processed_cells@meta.data) %in% colnames(all_cells@meta.data))]])
all_cells<-AddMetaData(all_cells,processed_metadata)
write.table(as.data.frame(all_cells@meta.data),col.names=T,row.names=T,sep="\t",quote=F,file="organoid_ST5.metadata.tsv")
system("slack -F organoid_ST5.metadata.tsv ryan_todo")
dat<-as.data.frame(all_cells@meta.data)
dat %>% filter(pass_qc=="True") %>% summarize(frip_avg=mean(FRIP),frip_sd=sd(FRIP))