huggingface_cli原生命令

  • 挂载远程目录

# 挂载
mount.nfs 172.17.120.251:/volume1/Public /mnt/nas
  • 安装环境依赖

sudo yum groupinstall "Development Tools" -y
sudo yum install openssl-devel libffi-devel bzip2-devel -y
wget https://www.python.org/ftp/python/3.9.16/Python-3.9.16.tgz
tar xf Python-3.9.16.tgz
cd Python-3.9.16
./configure --enable-optimizations
sudo make altinstall
​
​
# huggingface从0.23版本开始不支持断点续传,因此安装指定版本
pip install -U huggingface-hub==0.22.2
pip install urllib3==1.26.6
  • huggingface-cli下载命令

export HF_ENDPOINT=https://hf-mirror.com
​
huggingface-cli download \
    --token hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxx \
    --resume-download \
    --repo-type dataset \
    --local-dir bigcode_the-stack-dedup \
    --cache-dir ./.cache/huggingface/ \
    --local-dir-use-symlinks True \
    bigcode/the-stack-dedup 
​
# 注:local-dir-use-symlinks 控制是否软链接到缓存,如果False,断点续传失效

supervisor进程守护

  • 安装supervisor

pip3.9 install supervisor
​
mkdir /etc/supervisor
​
echo_supervisord_conf > /etc/supervisor/supervisord.conf
  • supervisor配置文件

vim /etc/supervisor/supervisord.conf
​
[unix_http_server]
file=/tmp/supervisor.sock   ; the path to the socket file
[inet_http_server]         ; inet (TCP) server disabled by default
port=0.0.0.0:9001        ; ip_address:port specifier, *:port for all iface
[supervisord]
logfile=/tmp/supervisord.log ; main log file; default $CWD/supervisord.log
logfile_maxbytes=50MB        ; max main logfile bytes b4 rotation; default 50MB
logfile_backups=10           ; # of main logfile backups; 0 means none, default 10
loglevel=info                ; log level; default info; others: debug,warn,trace
pidfile=/tmp/supervisord.pid ; supervisord pidfile; default supervisord.pid
nodaemon=false               ; start in foreground if true; default false
user=root
silent=false                 ; no logs to stdout if true; default false
minfds=1024                  ; min. avail startup file descriptors; default 1024
minprocs=200                 ; min. avail process descriptors;default 200
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///tmp/supervisor.sock ; use a unix:// URL  for a unix socket
[include]
files = /etc/supervisor/conf.d/*.conf
  • huggingface实例配置

vim /etc/supervisor/conf.d/huggingface.conf
​
[program:huggingface_cli]
command=sh /etc/supervisor/huggingface_cli.sh
priority=999                ; the relative start priority (default 999)
autostart=true              ; start at supervisord start (default: true)
autorestart=true            ; retstart at unexpected quit (default: true)
startsecs=10                ; number of secs prog must stay running (def. 10)
startretries=3              ; max # of serial start failures (default 3)
exitcodes=0,2               ; 'expected' exit codes for process (default 0,2)
stopsignal=TERM             ; signal used to kill process (default TERM)
stopwaitsecs=10             ; max num secs to wait before SIGKILL (default 10)
user=root                 ; setuid to this UNIX account to run the program
log_stdout=true
log_stderr=true             ; if true, log program stderr (def false)
logfile=/var/log/huggingface_cli_log.log
logfile_maxbytes=1MB        ; max # logfile bytes b4 rotation (default 50MB)
logfile_backups=10          ; # of logfile backups (default 10)
stderr_logfile_maxbytes=2MB  ; stdout 日志文件大小,默认 50MB
stderr_logfile_backups=20     ; stdout 日志文件备份数
stderr_logfile=/var/log/huggingface_cli.log
​
  • 供supervisor调用的下载脚本

vim /etc/supervisor/huggingface_cli.sh
​
#!/bin/bash
​
export HF_ENDPOINT="https://hf-mirror.com"
TOKEN="hf_xxxxxxxxxxxxxxxxxxxxx"
REPO_TYPE="dataset"
REPO_NAME="liwu/MNBVC"
LOCAL_DIR="/mnt/nas/Yuliao/Downloaded-EN/MNBVC/"
CACHE_DIR="/mnt/nas/Yuliao/Downloaded-EN/.cache/huggingface"
huggingface-cli download \
    $REPO_NAME \
    --token $TOKEN \
    --resume-download \
    --local-dir-use-symlinks True \
    --repo-type $REPO_TYPE \
    --local-dir $LOCAL_DIR \
    --cache-dir $CACHE_DIR \
  • web控制台

附:常规wget下载

# 断点续传并遍历下载nginx_autoindex文件服务器内所有文件
wget -c --recursive --no-parent --no-host-directories --reject "index.html*" http://ipaddress:8099/