PATH
mkdir -p ~/software/ cd ~/software/ wget https://www.biochen.org/gcen/static/0.6.3/gcen-0.6.3-linux-x86_64.tar.gz tar zxf gcen-0.6.3-linux-x86_64.tar.gz echo "export PATH=$HOME/software/gcen-0.6.3-linux-x86_64/bin:\$PATH" >> ~/.bashrc echo "export PATH=$HOME/software/gcen-0.6.3-linux-x86_64/util:\$PATH" >> ~/.bashrc source ~/.bashrc rm ~/software/gcen-0.6.3-linux-x86_64.tar.gz
The executable program is located in the bin
directory and the sample data is located in the sample_data
directory. All of the following commands can be run with the sample data. We first switch the current directory to the bin
directory.
cd ~/software/gcen-0.6.3-linux-x86_64/bin/
Step 1: data pretreatment
./data_norm -i ../sample_data/gene_expr.tsv -o ../sample_data/gene_expr_norm.tsv -m upqt ./data_filter -i ../sample_data/gene_expr_norm.tsv -o ../sample_data/gene_expr_norm_filter.tsv -p 0.75
Step 2: co-expression network construction
./network_build -i ../sample_data/gene_expr_norm_filter.tsv -o ../sample_data/gene_co_expr.network -m spearman -p 0.001 -c 0.8 -f -t 6
Step 3: module identification (optional)
./module_identify -i ../sample_data/gene_co_expr.network -o ../sample_data/module.txt -s 0.5 -t 6
Step 4: function annotation
network based annotation
./annotate -g ../sample_data/go-basic.obo -a ../sample_data/gene_go.assoc -n ../sample_data/gene_co_expr.network -o ../sample_data/network_go_annotation ./annotate -k ../sample_data/K2ko.tsv -a ../sample_data/gene_kegg.assoc -n ../sample_data/gene_co_expr.network -o ../sample_data/network_kegg_annotation
module based annotation (optional)
./annotate -g ../sample_data/go-basic.obo -a ../sample_data/gene_go.assoc -m ../sample_data/module.txt -o ../sample_data/module_go_annotation ./annotate -k ../sample_data/K2ko.tsv -a ../sample_data/gene_kegg.assoc -m ../sample_data/module.txt -o ../sample_data/module_kegg_annotation
identify genes with specific functions based on RWR (optional)
rwr -n ../sample_data/gene_co_expr.network -g ../sample_data/rwr_interested_gene.list -o ../sample_data/rwr_result.tsv
mkdir -p ~/software/ cd ~/software/ wget https://www.biochen.org/gcen/static/0.6.3/gcen-0.6.3-linux-x86_64.tar.gz tar zxf gcen-0.6.3-linux-x86_64.tar.gz echo "export PATH=$HOME/software/gcen-0.6.3-linux-x86_64/bin:\$PATH" >> ~/.bashrc echo "export PATH=$HOME/software/gcen-0.6.3-linux-x86_64/util:\$PATH" >> ~/.bashrc source ~/.bashrc rm ~/software/gcen-0.6.3-linux-x86_64.tar.gz
2. Install SRA Toolkit / STAR / RSEM
cd ~/software/ # SRA Toolkit wget https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.11.3/sratoolkit.2.11.3-ubuntu64.tar.gz tar zxf sratoolkit.2.11.3-ubuntu64.tar.gz echo "export PATH=$HOME/software/sratoolkit.2.11.3-ubuntu64/bin:\$PATH" >> ~/.bashrc source ~/.bashrc rm ~/software/sratoolkit.2.11.3-ubuntu64.tar.gz # STAR wget https://github.com/alexdobin/STAR/archive/refs/tags/2.7.9a.tar.gz tar zxf 2.7.9a.tar.gz echo "export PATH=$HOME/software/STAR-2.7.9a/bin/Linux_x86_64_static:\$PATH" >> ~/.bashrc source ~/.bashrc rm ~/software/2.7.9a.tar.gz # RSEM cd ~/ wget https://github.com/deweylab/RSEM/archive/v1.3.3.tar.gz tar zxf v1.3.3.tar.gz cd RSEM-1.3.3/ make install DESTDIR=$HOME prefix=/software/RSEM-1.3.3 echo "export PATH=$HOME/software/RSEM-1.3.3/bin:\$PATH" >> ~/.bashrc source ~/.bashrc rm -rf ~/RSEM-1.3.3 rm ~/v1.3.3.tar.gz
3. Download RNA-Seq data from NCBI SRA database
Download data to/home/chenwen/SRP009426/sra/
directory. Run ls
display files.
(base) chenwen@workstation:~/SRP009426/sra$ ls SRR372787.sra SRR372788.sra SRR372789.sra SRR372790.sra SRR372791.sra SRR372792.sra SRR372793.sra SRR372794.sra SRR372795.sra SRR372796.sra SRR372797.sra SRR372798.sra SRR372799.sra SRR372800.sra SRR372801.sra SRR372802.sra SRR372803.sra
4. Convert SRA files to FASTQ files
cd /home/chenwen/SRP009426/ mkdir fastq for i in sra/*.sra; do fastq-dump --split-files --origfmt --gzip --outdir fastq $i; done
cd /home/chenwen/SRP009426/ mkdir ref wget -P ref ftp://ftp.ensembl.org/pub/release-101/fasta/danio_rerio/dna/Danio_rerio.GRCz11.dna.primary_assembly.fa.gz wget -P ref ftp://ftp.ensembl.org/pub/release-101/gtf/danio_rerio/Danio_rerio.GRCz11.101.chr.gtf.gz gunzip ref/*.gz
cd /home/chenwen/SRP009426/ mkdir index rsem-prepare-reference --gtf ref/Danio_rerio.GRCz11.101.chr.gtf --star --star-sjdboverhang 76 ref/Danio_rerio.GRCz11.dna.primary_assembly.fa index/GRCz11 -p 8
cd /home/chenwen/SRP009426/ mkdir rsem for i in sra/*.sra; do prefix=$(basename $i .sra); rsem-calculate-expression --paired-end --star --star-gzipped-read-file --estimate-rspd -no-bam-output -p 8 fastq/${prefix}_1.fastq.gz fastq/${prefix}_2.fastq.gz index/GRCz11 rsem/${prefix}; done;
8. Get gene expression matrix for GCEN analysis
Write the sample information to the rsem_sample.txt file, and then run the following command:
SRR372787 /home/chenwen/SRP009426/rsem/SRR372787.genes.results SRR372788 /home/chenwen/SRP009426/rsem/SRR372788.genes.results SRR372789 /home/chenwen/SRP009426/rsem/SRR372789.genes.results SRR372790 /home/chenwen/SRP009426/rsem/SRR372790.genes.results SRR372791 /home/chenwen/SRP009426/rsem/SRR372791.genes.results SRR372792 /home/chenwen/SRP009426/rsem/SRR372792.genes.results SRR372793 /home/chenwen/SRP009426/rsem/SRR372793.genes.results SRR372794 /home/chenwen/SRP009426/rsem/SRR372794.genes.results SRR372795 /home/chenwen/SRP009426/rsem/SRR372795.genes.results SRR372796 /home/chenwen/SRP009426/rsem/SRR372796.genes.results SRR372797 /home/chenwen/SRP009426/rsem/SRR372797.genes.results SRR372798 /home/chenwen/SRP009426/rsem/SRR372798.genes.results SRR372799 /home/chenwen/SRP009426/rsem/SRR372799.genes.results SRR372800 /home/chenwen/SRP009426/rsem/SRR372800.genes.results SRR372801 /home/chenwen/SRP009426/rsem/SRR372801.genes.results SRR372802 /home/chenwen/SRP009426/rsem/SRR372802.genes.results SRR372803 /home/chenwen/SRP009426/rsem/SRR372803.genes.results
generate_expr_matrix_from_rsem -i rsem_sample.txt -o gene_expr.tsv --tpm
9. Data normalization and filter
data_norm -i gene_expr.tsv -o gene_expr_norm.tsv -m tmm
data_filter -i gene_expr_norm.tsv -o gene_expr_norm_filter.tsv -p 0.75
network_build -i gene_expr_norm_filter.tsv -o gene_co_expr.network -m spearman -p 0.001 -c 0.8 -f -t 8
go-basic.obo and zebrafish_gene_go.tsv can be downloaded from our website.
annotate -g go-basic.obo -a zebrafish_gene_go.tsv -n gene_co_expr.network -o network_go_annotation -t 8
K2ko.tsv and zebrafish_gene_kegg.tsv can be downloaded from our website.
annotate -k K2ko.tsv -a zebrafish_gene_kegg.tsv -n gene_co_expr.network -o network_kegg_annotation -t 8