php连接sftp下载超大目录文件

作者: 白云飞 分类: php,swoole 发布时间: 2019-12-31 15:11 阅读:

最近公司的有一个业务,需要将存量的合同文件,是pdf格式的。从sftp上下载下来,根据文件名( 文件名是我们一个特殊的业务字段,这里就不细说了 ),更新业务库,并将其上传到 oss 上备份,方便日后根据用户查看合同。

首先,要解决的问题就是,怎么通过php连接sftp。这里需要 php 拥有 ssh2 扩展。这个很好解决,百度就可以查到。首先展示我的 连接sftp的代码:

<?php
namespace app;

class Sftp
{
	private $host = '';

	private $port = 22;

	private $user = '';

	private $password = '';

	private $remotePath = '';

	private $localPath = './data';

	private $connection = '';

	private $sftp = '';

	public function __construct($config)
    {
        $sftpConfig = $config['sftp'];

        $this->host = $sftpConfig['host'];
        $this->port = $sftpConfig['port'];
        $this->user = $sftpConfig['user'];
        $this->password = $sftpConfig['password'];
        $this->remotePath = $sftpConfig['remotePath'];
        $this->localPath = $sftpConfig['localPath'];
    }

    private function connect()
    {
        $this->connection = ssh2_connect($this->host, $this->port);

        if (!ssh2_auth_password($this->connection, $this->user, $this->password)) {
            return -2;
        }

        if (!$this->sftp = ssh2_sftp($this->connection)) {
            return -1;
        }

        return 0;
    }

    public function downLoadAndUpToOSS($ossConfig)
    {
        if ($this->connect() < 0) {
            throw new \Exception("连接ssh失败");
        }

        $this->ossClient = new OSS($ossConfig);

        $dirHandle = opendir("ssh2.sftp://$this->sftp" . $this->remotePath);

        // 创建文件夹
        if (!is_dir($this->localPath)) {
            $dir = mkdir($this->localPath, 0777, true);
            if (!$dir) {
                return false;
            }
        }

        $this->scanAndDownload($dirHandle, $this->remotePath);
    }

    private function scanAndDownload($dirHandle, $prefix)
    {
        while (false !== ($file = readdir($dirHandle))) {

            if ($file != '.' && $file != '..') {

                $targetFile = "ssh2.sftp://$this->sftp" . $prefix . '/' . $file;
                if (is_file($targetFile)) {
                    $localRealFile = $this->localPath . '/' . $file;

                    if (is_file($localRealFile)) {
                        unlink($localRealFile);
                    }

                    go(function () use ($targetFile, $localRealFile, $file) {
                        copy($targetFile, $localRealFile);

                        $this->ossClient->uploadFile($file, $localRealFile);
                        list($prefix, $contractLoanIndex, $date) = explode('_', $file);

                        $sql = 'UPDATE `channel_contract_loan_apply` SET `contract_path` = "' . $file
                            . '" WHERE `contract_loan_index` = ' . $contractLoanIndex;
                        echo $sql . PHP_EOL;
                        $this->mysql->query($sql);

                        unlink($localRealFile);
                    });
                } else if (is_dir($targetFile)) {

                    $this->scanAndDownload(opendir($targetFile), $prefix . '/' . $file);
                }
            }
        }
    }
}

这里就是 连接 sftp 读取目标目录下的文件,并且上传只数据库。中间有一部分业务的处理。由于考虑到了文件量比较大,一个目录有 10万+个文件,有心里准备。所以这里采用了swoole的 go 协程来处理耗时的操作。虽然考虑到了这一点,真的到线上运行的时候,发现程序直接卡住了。打开一个 10w+ 文件的目录,readdir 根本没办法搞定,一次性打开,根本不现实。直接会导致内存溢出。考虑到这一点,于是乎想办法。突然想到,php有个迭代器,平时比较少使用,可以解决少量内存打开大的文件或者目录的作用。于是乎,百度了一段。

private function glob2foreach($path, $include_dirs = false)
    {
        $path = rtrim($path, '/*');
        if (is_readable($path)) {
            $dh = opendir($path);

            while (($file = readdir($dh)) !== false) {
                if (substr($file, 0, 1) == '.') {
                    continue;
                }

                $rfile = "{$path}/{$file}";

                if (is_dir($rfile)) {
                    $sub = $this->glob2foreach($rfile, $include_dirs);

                    while ($sub->valid()) {
                        yield $sub->current();
                        $sub->next();
                    }

                    if ($include_dirs) {
                        yield $rfile;
                    }

                } else {

                    yield $rfile;
                }
            }

            closedir($dh);
        }
    }

通过迭代器打开目录,并结合主进程中的

$glob = $this->glob2foreach($dir);
while ($glob->valid()) {
	// 当前文件
	$filename = $glob->current();

	// 这个就是包括路径在内的完整文件名了
	$file = explode("/", $filename);
	if (isset($file[count($file) - 1])) {
		$parseFile = $file[count($file) - 1];
		$localRealFile = $this->localPath . '/' . $parseFile;

		if (is_file($localRealFile)) {
			unlink($localRealFile);
		}

		go(function () use ($filename, $localRealFile, $parseFile) {
			copy($filename, $localRealFile);

			$this->ossClient->uploadFile($parseFile, $localRealFile);

			// TODO 业务
			unlink($localRealFile);
		});
	}

	// 指向下一个,不能少
	$glob->next();
}

这样便可以让php在这个超大的目录中,一个一个的去读取操作目标文件了。跟我的代码进行整合之后,果然有效。于是整理放出,方便给有同样问题的人一个参考

<?php
namespace app;

class Sftp
{
	private $host = '';

	private $port = 22;

	private $user = '';

	private $password = '';

	private $remotePath = '';

	private $localPath = './data';

	private $connection = '';

	private $sftp = '';

	private $ossClient = null;

	public function __construct($config)
    {
        $sftpConfig = $config['sftp'];

        $this->host = $sftpConfig['host'];
        $this->port = $sftpConfig['port'];
        $this->user = $sftpConfig['user'];
        $this->password = $sftpConfig['password'];
        $this->remotePath = $sftpConfig['remotePath'];
        $this->localPath = $sftpConfig['localPath'];
    }

    private function connect()
    {
        $this->connection = ssh2_connect($this->host, $this->port);

        if (!ssh2_auth_password($this->connection, $this->user, $this->password)) {
            return -2;
        }

        if (!$this->sftp = ssh2_sftp($this->connection)) {
            return -1;
        }

        return 0;
    }

    public function downLoadAndUpToOSS($ossConfig)
    {
        if ($this->connect() < 0) {
            throw new \Exception("连接ssh失败");
        }

        $this->ossClient = new OSS($ossConfig);

        //$dirHandle = opendir("ssh2.sftp://$this->sftp" . $this->remotePath);
        $dir = "ssh2.sftp://$this->sftp" . $this->remotePath;

        // 创建文件夹
        if (!is_dir($this->localPath)) {
            $dir = mkdir($this->localPath, 0777, true);
            if (!$dir) {
                return false;
            }
        }

        $glob = $this->glob2foreach($dir);
        while ($glob->valid()) {
            // 当前文件
            $filename = $glob->current();

            // 这个就是包括路径在内的完整文件名了
            $file = explode("/", $filename);
            if (isset($file[count($file) - 1])) {
                $parseFile = $file[count($file) - 1];
                $localRealFile = $this->localPath . '/' . $parseFile;

                if (is_file($localRealFile)) {
                    unlink($localRealFile);
                }

                go(function () use ($filename, $localRealFile, $parseFile) {
                    copy($filename, $localRealFile);

                    $this->ossClient->uploadFile($parseFile, $localRealFile);

                    // TODO 业务

                    unlink($localRealFile);
                    
                });
            }

            // 指向下一个,不能少
            $glob->next();
        }
    }

    private function glob2foreach($path, $include_dirs = false)
    {
        $path = rtrim($path, '/*');
        if (is_readable($path)) {
            $dh = opendir($path);

            while (($file = readdir($dh)) !== false) {
                if (substr($file, 0, 1) == '.') {
                    continue;
                }

                $rfile = "{$path}/{$file}";

                if (is_dir($rfile)) {
                    $sub = $this->glob2foreach($rfile, $include_dirs);

                    while ($sub->valid()) {
                        yield $sub->current();
                        $sub->next();
                    }

                    if ($include_dirs) {
                        yield $rfile;
                    }

                } else {

                    yield $rfile;
                }
            }

            closedir($dh);
        }
    }
}

如果觉得我的文章对您有用,请随意打赏。您的支持将鼓励我继续创作!