kyanag/revter

Maintainers

Details

github.com/kyanag/revter

Source

Issues

Installs: 3

Dependents: 0

Suggesters: 0

Security: 0

Stars: 0

Watchers: 0

Forks: 0

Open Issues: 0

Type:project

pkg:composer/kyanag/revter

dev-main 2025-09-23 15:29 UTC

This package is auto-updated.

Last update: 2025-09-23 15:31:32 UTC


README

ReverseRouter,用写 Web 的方式写爬虫

特点

  1. 用类似 Slim / Laravel 此类路由风格的方式写爬虫处理代码
  2. 支持路由中间件
  3. 支持全局中间件
  4. ⚠️除了 路由 和 PSR7 依赖,需自行实现 日志处理 / 事件处理 / 队列 / Http请求 等内容⚠️

Vars 参数详情

  1. __dispatch_vars

代码见 zhihu.php

include __DIR__ . "/../vendor/autoload.php";

//设置运行目录
$path = setcwd(runtime_path("zhihu"));

$app = new \Kyanag\Revter\App();

//jsonl 数据存储器
$storage = Factory::makeDataStorage("jsonl", [
    'dir' => getcwd(),
]);

//Http Client
$client = new GuzzleHttp\Client([
    'base_uri' => 'https://www.zhihu.com/',
    'timeout'  => 5.0,
    'verify' => false,
    //'proxy' => "socks://127.0.0.1:10808",
]);

//日志
$logger = Factory::makeLogger("zhihu", "app.log", true);

//全局中间件 - 延迟 1 秒
$app->addMiddleware(function ($request, $next) use ($logger) {
    try {
        $res = $next($request);
        sleep(1);
        return $res;
    }catch (\Exception $e){
        return null;
    }
});

//全局中间件 - 日志
$app->addMiddleware(function (\Psr\Http\Message\ServerRequestInterface $request, $next) use($client, $logger){
    /** @var \Kyanag\Revter\App $this */

    $logger->info("任务 {$request->getMethod()}:{$request->getUri()} 开始");
    $logger->info("[App::Middleware@before] {$request->getMethod()}:{$request->getUri()}");
    
// 发送请求并注入到 request 中    
//    $response = $client->send($request);
//    $request = $request->withAttribute("response", $response);

    $res = $next($request);
    $logger->info("[App::Middleware@after] {$request->getMethod()}:{$request->getUri()}\n\n");
    return $res;
});


//添加路由
$app->on("/question/{question_id}", function(\Psr\Http\Message\ServerRequestInterface $request, $vars = []) use($logger){
    //问题贴页面
    unset($vars['__route'], $vars['__dispatch_vars']);
    $res = str_replace("\n", "\n\t", var_export($vars, true));

    $logger->info(implode("", [
        "{$request->getMethod()}:{$request->getUri()}\n",
        "\tquestion_id = {$vars['question_id']}\n",
        "\tvars = {$res}"
    ]));
});

//支持前缀路由组
$app->group("/people/{uid}", function(\Kyanag\Revter\Core\RouteCollector $collector) use($logger){
    // => "/people/{uid}/answers"
    $collector->on("/answers", function(\Psr\Http\Message\ServerRequestInterface $request, $vars = []) use($logger){
        //用户主页 - 回答列表
        unset($vars['__route'], $vars['__dispatch_vars']);

        $res = str_replace("\n", "\n\t", var_export($vars, true));
        $logger->info(implode("", [
            "{$request->getMethod()}:{$request->getUri()}\n",
            "\tuid = {$vars['uid']}\n",
            "\tvars = {$res}"
        ]));
    });
});

//添加路由并添加路由中间件
$app->on("/collection/{collection_id}", function(\Psr\Http\Message\ServerRequestInterface $request, $vars = []) use($logger){
    //收藏夹页
    unset($vars['__route'], $vars['__dispatch_vars']);

    $res = str_replace("\n", "\n\t", var_export($vars, true));

    $logger->info(implode("", [
        "{$request->getMethod()}:{$request->getUri()}\n",
        "\tcollection_id = {$vars['collection_id']}\n",
        "\tvars = {$res}"
    ]));
})->addMiddleware(function ($request, $vars, $next) use($logger){
    //路由中间件
    $logger->info("[Route::Middleware@before] collection_id = {$vars['collection_id']}");
    $res = call_user_func($next, $request, $vars);
    $logger->info("[Route::Middleware@after] collection_id = {$vars['collection_id']}");
    return $res;
});

/** @var \Kyanag\Revter\Libs\Queue\MemoryQueue $queue */
$queue = Factory::makeQueue("memory");

$queue->addUrl('https://www.zhihu.com/question/1936165649241076667');
$queue->addUrl('https://zhuanlan.zhihu.com/p/676908347');
$queue->addUrl('https://www.zhihu.com/collection/459061635');

//运行
$app->run($queue);

输出如下

[2025-09-23 14:29:26] INFO: 任务 GET:https://www.zhihu.com/collection/459061635 开始
[2025-09-23 14:29:26] INFO: [App::Middleware@before] GET:https://www.zhihu.com/collection/459061635
[2025-09-23 14:29:26] INFO: [Route::Middleware@before] collection_id = 459061635
[2025-09-23 14:29:26] INFO: GET:https://www.zhihu.com/collection/459061635
        collection_id = 459061635
        vars = array (
          'collection_id' => '459061635',
        )
[2025-09-23 14:29:26] INFO: [Route::Middleware@after] collection_id = 459061635
[2025-09-23 14:29:27] INFO: [App::Middleware@after] GET:https://www.zhihu.com/collection/459061635


[2025-09-23 14:29:27] INFO: 任务 GET:https://zhuanlan.zhihu.com/p/676908347 开始
[2025-09-23 14:29:27] INFO: [App::Middleware@before] GET:https://zhuanlan.zhihu.com/p/676908347
[2025-09-23 14:29:27] INFO: [App::Middleware@after] GET:https://zhuanlan.zhihu.com/p/676908347


[2025-09-23 14:29:27] INFO: 任务 GET:https://www.zhihu.com/question/1936165649241076667 开始
[2025-09-23 14:29:27] INFO: [App::Middleware@before] GET:https://www.zhihu.com/question/1936165649241076667
[2025-09-23 14:29:27] INFO: GET:https://www.zhihu.com/question/1936165649241076667
        question_id = 1936165649241076667
        vars = array (
          'question_id' => '1936165649241076667',
        )
[2025-09-23 14:29:28] INFO: [App::Middleware@after] GET:https://www.zhihu.com/question/1936165649241076667