kyanag / revter
Installs: 3
Dependents: 0
Suggesters: 0
Security: 0
Stars: 0
Watchers: 0
Forks: 0
Open Issues: 0
Type:project
pkg:composer/kyanag/revter
Requires
- ext-json: *
- guzzlehttp/guzzle: ^6.5
- imangazaliev/didom: ^2.0
- monolog/monolog: ^2.10
- nikic/fast-route: 1.3
- php-di/php-di: ^6.3
- psr/http-message: ^1.0
- symfony/event-dispatcher: ^4.4
Requires (Dev)
- squizlabs/php_codesniffer: ^3.11
This package is auto-updated.
Last update: 2025-09-23 15:31:32 UTC
README
ReverseRouter,用写 Web 的方式写爬虫
特点
- 用类似 Slim / Laravel 此类路由风格的方式写爬虫处理代码
- 支持路由中间件
- 支持全局中间件
- ⚠️除了 路由 和 PSR7 依赖,需自行实现 日志处理 / 事件处理 / 队列 / Http请求 等内容⚠️
Vars 参数详情
- __dispatch_vars
代码见 zhihu.php
include __DIR__ . "/../vendor/autoload.php"; //设置运行目录 $path = setcwd(runtime_path("zhihu")); $app = new \Kyanag\Revter\App(); //jsonl 数据存储器 $storage = Factory::makeDataStorage("jsonl", [ 'dir' => getcwd(), ]); //Http Client $client = new GuzzleHttp\Client([ 'base_uri' => 'https://www.zhihu.com/', 'timeout' => 5.0, 'verify' => false, //'proxy' => "socks://127.0.0.1:10808", ]); //日志 $logger = Factory::makeLogger("zhihu", "app.log", true); //全局中间件 - 延迟 1 秒 $app->addMiddleware(function ($request, $next) use ($logger) { try { $res = $next($request); sleep(1); return $res; }catch (\Exception $e){ return null; } }); //全局中间件 - 日志 $app->addMiddleware(function (\Psr\Http\Message\ServerRequestInterface $request, $next) use($client, $logger){ /** @var \Kyanag\Revter\App $this */ $logger->info("任务 {$request->getMethod()}:{$request->getUri()} 开始"); $logger->info("[App::Middleware@before] {$request->getMethod()}:{$request->getUri()}"); // 发送请求并注入到 request 中 // $response = $client->send($request); // $request = $request->withAttribute("response", $response); $res = $next($request); $logger->info("[App::Middleware@after] {$request->getMethod()}:{$request->getUri()}\n\n"); return $res; }); //添加路由 $app->on("/question/{question_id}", function(\Psr\Http\Message\ServerRequestInterface $request, $vars = []) use($logger){ //问题贴页面 unset($vars['__route'], $vars['__dispatch_vars']); $res = str_replace("\n", "\n\t", var_export($vars, true)); $logger->info(implode("", [ "{$request->getMethod()}:{$request->getUri()}\n", "\tquestion_id = {$vars['question_id']}\n", "\tvars = {$res}" ])); }); //支持前缀路由组 $app->group("/people/{uid}", function(\Kyanag\Revter\Core\RouteCollector $collector) use($logger){ // => "/people/{uid}/answers" $collector->on("/answers", function(\Psr\Http\Message\ServerRequestInterface $request, $vars = []) use($logger){ //用户主页 - 回答列表 unset($vars['__route'], $vars['__dispatch_vars']); $res = str_replace("\n", "\n\t", var_export($vars, true)); $logger->info(implode("", [ "{$request->getMethod()}:{$request->getUri()}\n", "\tuid = {$vars['uid']}\n", "\tvars = {$res}" ])); }); }); //添加路由并添加路由中间件 $app->on("/collection/{collection_id}", function(\Psr\Http\Message\ServerRequestInterface $request, $vars = []) use($logger){ //收藏夹页 unset($vars['__route'], $vars['__dispatch_vars']); $res = str_replace("\n", "\n\t", var_export($vars, true)); $logger->info(implode("", [ "{$request->getMethod()}:{$request->getUri()}\n", "\tcollection_id = {$vars['collection_id']}\n", "\tvars = {$res}" ])); })->addMiddleware(function ($request, $vars, $next) use($logger){ //路由中间件 $logger->info("[Route::Middleware@before] collection_id = {$vars['collection_id']}"); $res = call_user_func($next, $request, $vars); $logger->info("[Route::Middleware@after] collection_id = {$vars['collection_id']}"); return $res; }); /** @var \Kyanag\Revter\Libs\Queue\MemoryQueue $queue */ $queue = Factory::makeQueue("memory"); $queue->addUrl('https://www.zhihu.com/question/1936165649241076667'); $queue->addUrl('https://zhuanlan.zhihu.com/p/676908347'); $queue->addUrl('https://www.zhihu.com/collection/459061635'); //运行 $app->run($queue);
输出如下
[2025-09-23 14:29:26] INFO: 任务 GET:https://www.zhihu.com/collection/459061635 开始 [2025-09-23 14:29:26] INFO: [App::Middleware@before] GET:https://www.zhihu.com/collection/459061635 [2025-09-23 14:29:26] INFO: [Route::Middleware@before] collection_id = 459061635 [2025-09-23 14:29:26] INFO: GET:https://www.zhihu.com/collection/459061635 collection_id = 459061635 vars = array ( 'collection_id' => '459061635', ) [2025-09-23 14:29:26] INFO: [Route::Middleware@after] collection_id = 459061635 [2025-09-23 14:29:27] INFO: [App::Middleware@after] GET:https://www.zhihu.com/collection/459061635 [2025-09-23 14:29:27] INFO: 任务 GET:https://zhuanlan.zhihu.com/p/676908347 开始 [2025-09-23 14:29:27] INFO: [App::Middleware@before] GET:https://zhuanlan.zhihu.com/p/676908347 [2025-09-23 14:29:27] INFO: [App::Middleware@after] GET:https://zhuanlan.zhihu.com/p/676908347 [2025-09-23 14:29:27] INFO: 任务 GET:https://www.zhihu.com/question/1936165649241076667 开始 [2025-09-23 14:29:27] INFO: [App::Middleware@before] GET:https://www.zhihu.com/question/1936165649241076667 [2025-09-23 14:29:27] INFO: GET:https://www.zhihu.com/question/1936165649241076667 question_id = 1936165649241076667 vars = array ( 'question_id' => '1936165649241076667', ) [2025-09-23 14:29:28] INFO: [App::Middleware@after] GET:https://www.zhihu.com/question/1936165649241076667