数据下载管理¶
服务部署¶
主要分为软件配置、代码部署两部分
部署方式一:手动安装¶
需要python >= 3.8.15,如果版本过低,建议使用micromamba创建虚拟环境
#代码部署
cd ~/02Software
git clone http://git.iomics.pro/yhfu/IData_alpha.git
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
#软件部署
#aspera
mkdir ~/02Software/aspera
cd ~/02Software/aspera
wget https://ak-delivery04-mul.dhe.ibm.com/sar/CMA/OSA/0adrj/0/ibm-aspera-connect_4.1.3.93_linux.tar.gz
tar zxvf ibm-aspera-connect_4.1.3.93_linux.tar.gz
bash ibm-aspera-connect_4.1.3.93_linux.sh
#加入环境变量
echo 'export PATH="$HOME/.aspera/connect/bin:$PATH"' >> ~/.bashrc && source ~/.bashrc
#储存在node manager中
$HOME/.aspera/connect/etc/asperaweb_id_dsa.openssh
#sratoolkit
mkdir ~/02Software/sratoolkit
cd ~/02Software/sratoolkit
wget --output-document sratoolkit.tar.gz https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/current/sratoolkit.current-centos_linux64.tar.gz
tar -vxzf sratoolkit.tar.gz
cd ./sratoolkit.3.1.1-centos_linux64/bin
echo "export PATH=\"$(pwd):\$PATH\"" >> ~/.bashrc && source ~/.bashrc
#edgeturbo
mkdir ~/02Software/edgeturbo
cd ~/02Software/edgeturbo
wget https://ngdc.cncb.ac.cn/ettrans/download/edgeturbo-client.linux.latest.cncb.tar.gz
tar -zxvf edgeturbo-client.linux.latest.cncb.tar.gz #解压即安装
echo "export PATH=\"$(pwd)/edgeturbo-client:\$PATH\"" >> ~/.bashrc && source ~/.bashrc #添加环境变量
#节点免密登陆设置
cd ~/.ssh
ssh-keygen
(enter)
(enter)
less id_rsa.pub(复制里面的内容--公共密钥)
将生成的公共密钥放入链接服务器的authorized_keys文件里:
(如果authorized_keys 文件不存在,手动创建:cat id_rsa.pub >> authorized_keys)
vim authorized_keys
修改文件权限
chmod 700 .ssh
chmod 600 authorized_keys
部署方式二: Docker镜像部署
#安装镜像 (当前镜像不完整)
docker docker load -i downloader_v0.9.tar
#mac
docker run -it --name dl1 -v /public/idata/tmp/download/mac1_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n mac1_mu01
#dl0
docker run -it --name dl1 -v /public/idata/tmp/download/dl0_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n dl0_mu01
#dl1
docker run -it --name dl1 -v /public/idata/tmp/download/dl1_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n dl1_mu01
#dl2
docker run -it --name dl1 -v /public/idata/tmp/download/dl2_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n dl2_mu01
#dl3
docker run -it --name dl1 -v /public/idata/tmp/download/dl3_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n dl3_mu01
#dl4
docker run -it --name dl1 -v /public/idata/tmp/download/dl4_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n dl4_mu01
#dl5
docker run -it --name dl1 -v /public/idata/tmp/download/dl5_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n dl5_mu01
#pc9
docker run -it --name dl1 -v /public/idata/tmp/download/pc9_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n pc9_mu01
#pc3
docker run -it --name dl1 -v /public/idata/tmp/download/pc3_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n pc3_mu01
#pc1
docker run -it --name dl1 -v /public/idata/tmp/download/pc1_mu01:/dl -w /download/IData_alpha/idata_alpha downloader:v0.9 /bin/bash
python3 run_worker.py -n pc1_mu01
提交任务¶
step1: 先获取需要下载的run list
具体参数见--help
python3 ./download/get_download_run.py -s 'Sus scrofa' -m 'WGS' -o ./need_download_run.txt
-s:物种名
-m:组学名称
Note
默认情况下会对数据进行过滤(WGS/HIC深度大于10×,其他组学数据量要大于6 million),如需要修改可设置-d/-r 参数。
step2: 启动下载任务
具体参数见--help
python3 ./download/download.py -i ./need_download_run.txt -n breed_mu01
-n:开始下载的节点名称
Note
支持的节点写在了./conf/config.py里面
任务追踪¶
数据归档¶
详情见原始文件归档
本文阅读量 次本站总访问量 次
Authors: