定制 kurozumi/web-scraper-bundle 二次开发

按需修改功能、优化性能、对接业务系统,提供一站式技术支持

邮箱:yvsm@zunyunkeji.com | QQ:316430983 | 微信:yvsm316

kurozumi/web-scraper-bundle

最新稳定版本:v6.1.5

Composer 安装命令:

composer require kurozumi/web-scraper-bundle

包简介

README 文档

README

Manage multiple Web Scraper bundle for Symfony.

Install

composer req kurozumi/web-scraper-bundle

How to use

namespace App\Command;

use Kurozumi\WebScraperBundle\Service\Context;
use Kurozumi\WebScraperBundle\Service\Scraper\RssScraper;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;

#[AsCommand(
    name: 'app:scraper'
)]
class ScraperCommand extends Command
{
    private Context $context;
    
    public function __construct(Context $context)
    {
        $this->context = $context;
        
        parent::__construct();
    }
    
    protected function execute(InputInterface $input, OutputInterface $output): int
    {
        $feeds = [
            'https://aaa.rss.xml',
            'https://bbb.rss.xml'
        ];
        
        $items = [];
        foreach ($feeds as $feed) {
            $data = $this->content->getData($feed);
            if (null !== $data) {
                foreach ($data['items'] as $item) {
                    switch ($data['name']) {
                        case RssScraper::class:
                            $items[] = [
                                'title' => $item->filter('title')->text(),
                                'url' => $item->filter('link')->text()
                            ];
                            break;
                    }
                }
            }
        }
        
        print_r($items);
    
        return Command::SUCCESS;
    }
}

Custom Scraper

Feed

<?php

namespace App\Service\Scraper;

final class YouTubeFeedScraper extends AbstractScraper
{
    /**
     * @param string $url
     * @return \ArrayIterator
     * @throws \Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface
     * @throws \Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface
     * @throws \Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface
     * @throws \Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface
     */
    public function getItems(string $url): \ArrayIterator
    {
        $response = $this->getResponse($url);

        $items = new \ArrayIterator();
        $crawler = $this->getCrawler($response);
        if ('feed' === $crawler->nodeName()) {
            $crawler->setDefaultNamespacePrefix('m', 'http://search.yahoo.com/mrss/');
            foreach ($crawler->filter('m|entry') as $item) {
                $items->append($this->getItem($item));
            }
        }

        return $items;
    }
}

Html

<?php

namespace App\Service\Scraper;

final class HtmlScraper extends AbstractScraper
{
    /**
     * @param string $url
     * @return \ArrayIterator
     * @throws \Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface
     * @throws \Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface
     * @throws \Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface
     * @throws \Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface
     */
    public function getItems(string $url): \ArrayIterator
    {
        $response = $this->getResponse($url);

        $items = new \ArrayIterator();
        $crawler = $this->getCrawler($response);
        if ('html' === $crawler->nodeName()) {
            foreach ($crawler->filter('ul > li') as $item) {
                $items->append($this->getItem($item));
            }
        }
        
        return $items;
    }
}

统计信息

  • 总下载量: 21
  • 月度下载量: 0
  • 日度下载量: 0
  • 收藏数: 0
  • 点击次数: 2
  • 依赖项目数: 0
  • 推荐数: 0

GitHub 信息

  • Stars: 0
  • Watchers: 1
  • Forks: 0
  • 开发语言: PHP

其他信息

  • 授权协议: MIT
  • 更新时间: 2022-10-05

承接程序开发

PHP开发

VUE

Vue开发

前端开发

小程序开发

公众号开发

系统定制

数据库设计

云部署

网站建设

安全加固