Ruleset registry importer

master
Константин 2023-08-22 14:50:39 +03:00
parent b7218daef9
commit 8fb20882bb
10 changed files with 205 additions and 4 deletions

View File

@ -77,4 +77,13 @@ class Category extends Model {
} }
public function addCategory(string $name): ?Model {
$res = $this->children()->where(['name' => $name])->first();
if (!$res) {
$res = $this->children()->create(['registry_id' => $this->registry_id, 'name' => $name]);
$res->update(['ord' => $res->getMaxOrd()]);
}
return $res;
}
} }

View File

@ -123,6 +123,7 @@ class Entry extends Model {
public function sortOperations() { public function sortOperations() {
$this->operations()->reorder()->applyOrders(['order-date' => 'desc'])->get()->each(function($operation, $ord) { $this->operations()->reorder()->applyOrders(['order-date' => 'desc'])->get()->each(function($operation, $ord) {
$this->objects()->updateExistingPivot($operation, ['ord' => $ord]); $this->objects()->updateExistingPivot($operation, ['ord' => $ord]);
if ($ord === 0) $this->update(['active_since' => $operation->value('active-since'), 'active_till' => $operation->value('active-till')]);
}); });
} }

View File

@ -54,4 +54,13 @@ class Registry extends Model {
} }
public function addCategory(string $name): ?Model {
$res = $this->categories()->where(['name' => $name])->first();
if (!$res) {
$res = $this->categories()->create(['name' => $name]);
$res->update(['ord' => $res->getMaxOrd()]);
}
return $res;
}
} }

View File

@ -0,0 +1,55 @@
<?php
namespace App\Services\Registries;
use App\Models\Asset;
use App\Models\Registries\Registry;
use Illuminate\Support\Facades\Auth;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Storage;
use PHPHtmlParser\Dom;
class RegistryImportService {
protected Registry $registry;
protected string $url;
protected Dom $dom;
protected array $mimes = [
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'pdf' => 'application/pdf',
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
];
public function __construct(Registry $registry, string $url) {
$this->registry = $registry;
$this->url = $url;
$this->dom = new Dom;
$this->dom->loadFromUrl($url);
}
public function download($url, $dir = null): ?Asset {
$info = pathinfo($url);
$path = "public/documents/registries";
$path = $dir ? "{$path}/{$dir}/{$info['basename']}" : "{$path}/{$info['basename']}";
$asset = Asset::query()->where(['path' => $path])->first();
if (!$asset && Storage::put($path, Http::get($url)->body())) $asset = $this->makeAsset($path);
elseif ($asset) var_dump($asset->path);
return $asset ?? null;
}
public function makeAsset($path, $name = null) {
$info = pathinfo($path);
return Asset::create([
'type' => 'document',
'path' => $path,
'mime' => $this->mimes[$info['extension']] ?? null,
'name' => $name ?? $info['basename'],
'filename' => $info['basename'],
'extension' => $info['extension'],
'user_id' => ($user = Auth::user()) ? $user->id : null
]);
}
}

View File

@ -0,0 +1,115 @@
<?php
namespace App\Services\Registries;
use App\Models\Registries\Category;
use App\Models\Registries\Entry;
use Illuminate\Support\Facades\Date;
use Illuminate\Support\Str;
use PHPHtmlParser\Dom;
class RulesetImportService extends RegistryImportService {
public function test() {
$res = [];
$nodes = $this->dom->find('#sp_entyity *')->toArray();
foreach ($nodes as $node) {
if ($node->tag->name() === 'div') {
$items = $node->find('li');
foreach ($items as $k => $item) {
$res[] = trim($item->text, '«» ');
//if ($k >= 5) return;
}
}
}
$res = collect(array_count_values($res))->filter(function($count) {
return $count > 1;
});
var_dump($res);
}
public function import() {
$category = null;
$subcategory = null;
$nodes = $this->dom->find('#sp_entyity *')->toArray();
foreach ($nodes as $node) {
$name = trim($node->text, '«» ');
if ($node->tag->name() === 'h2') {
$category = $this->registry->addCategory($name);
$subcategory = null;
} elseif ($node->tag->name() === 'h3') {
if ($category) $subcategory = $category->addCategory($name);
}
elseif ($node->tag->name() === 'div') {
$items = $node->find('li');
foreach ($items as $k => $item) {
$entryName = trim($item->text, '«» ');
if (!$this->registry->entries()->where(['name' => $entryName])->exists()) {
echo ("Importing {$entryName}\n");
$link = $item->find('a', 0);
$this->importItem("{$this->url}{$link->href}", $subcategory);
} else echo("Already imported {$entryName}\n");
//if ($k >= 5) return;
}
}
}
}
public function importItem(string $url, Category $category) {
$dom = new Dom;
$dom->loadFromUrl($url);
$link = $dom->find('p.ACTUALNAME a', 0);
list($number, $name) = explode('«', $link->text);
$number = trim($number);
$name = trim($name, '» ');
$entry = $category->entries()->firstOrCreate(['registry_id' => $category->registry_id, 'number' => $number]);
$entry->update(['name' => $name]);
$this->importOperations($entry, array_slice($dom->find('table.inc1 tr')->toArray(), 1));
}
public function importOperations(Entry $entry, array $operations) {
foreach ($operations as $operation) {
$this->importOperation($entry, $operation);
}
$entry->sortOperations();
}
public function importOperation(Entry $entry, $node) {
list($col1, $col2, $col3, $col4, $col5) = $node->find('td')->toArray();
$type = trim(Str::replace(explode(' ', $entry->number), '', $col1->text ?? ''), ': ');
$data['operation-type'] = ['title' => explode(' ', $type)[0]];
$data['active-since'] = $col3->text ? Date::create($col3->text) : null;
$data['active-till'] = $col4->text ? Date::create($col4->text) : null;
$data['developer'] = Str::replace('&quot;', '"', $col5->text ?? null);
$data['listings'] = [];
$data['order-name'] = $data['order-date'] = $data['order-document'] = null;
$data = $this->parseColumn2($col2, $data);
$object = $entry->operations()->applyFilters(['order-name' => $data['order-name'] ?? null])->first();
if (!$object) $object = $entry->createObject($entry->registry->options['operations'] ?? null, null, 'operations');
$object->setValues($data);
}
public function parseColumn2($node, $data): array {
$links = $node->find('a')->toArray();
if ($links) {
foreach ($node->find('a')->toArray() as $link) {
if (count(explode(' от ', $link->text)) === 2) {
list($orderName, $orderDate) = explode(' от ', $link->text);
$data['order-name'] = $orderName;
$data['order-date'] = $orderDate ? Date::create($orderDate) : null;
$data['order-document'] = $this->download($link->href, 'ruleset');
} else $data['listings'][] = ['name' => Str::replace('Постановление правительства №', 'pp', $link->text)];
}
} else {
list($orderName, $orderDate) = explode(' от ', $node->text);
$data['order-name'] = $orderName;
$data['order-date'] = $orderDate ? Date::create($orderDate) : null;
}
return $data;
}
}

View File

@ -18,7 +18,7 @@ class CreateRegistryCategoriesTable extends Migration
$table->char('uuid', 36)->index()->unique(); $table->char('uuid', 36)->index()->unique();
$table->integer('registry_id')->index()->nullable(); $table->integer('registry_id')->index()->nullable();
$table->integer('parent_id')->index()->default(0); $table->integer('parent_id')->index()->default(0);
$table->string('name')->index()->nullable(); $table->string('name', 750)->index()->nullable();
$table->integer('ord')->index()->default(0); $table->integer('ord')->index()->default(0);
$table->timestamps(); $table->timestamps();
$table->softDeletes(); $table->softDeletes();

View File

@ -20,7 +20,7 @@ class CreateRegistryEntriesTable extends Migration
$table->integer('category_id')->index()->nullable(); $table->integer('category_id')->index()->nullable();
$table->integer('asset_id')->index()->nullable(); $table->integer('asset_id')->index()->nullable();
$table->string('number')->index()->nullable(); $table->string('number')->index()->nullable();
$table->string('name')->index()->nullable(); $table->string('name', 750)->index()->nullable();
$table->date('active_since')->index()->nullable(); $table->date('active_since')->index()->nullable();
$table->date('active_till')->index()->nullable(); $table->date('active_till')->index()->nullable();
$table->date('suspended_since')->index()->nullable(); $table->date('suspended_since')->index()->nullable();

View File

@ -22,7 +22,7 @@ class PagesTableSeeder extends Seeder
'Нормативные правовые акты' => ['type' => PageType::REGISTRY, 'registry_type' => RegistryType::SIMPLE], 'Нормативные правовые акты' => ['type' => PageType::REGISTRY, 'registry_type' => RegistryType::SIMPLE],
'Наблюдательный совет' => [], 'Наблюдательный совет' => [],
'Государственное задание' => [], 'Государственное задание' => [],
'Закупки' => ['type' => PageType::REGISTRY, 'registry_type' => RegistryType::SIMPLE], 'Закупки' => ['type' => PageType::REGISTRY, 'registry_type' => RegistryType::CATEGORIZED],
'Бухгалтерская отчетность' => [], 'Бухгалтерская отчетность' => [],
'Антимонопольное законодательство' => [], 'Антимонопольное законодательство' => [],
] ]

View File

@ -44,7 +44,7 @@ Route::group(['prefix' => 'objects'], function() {
Route::get('/', 'Api\Objects\ObjectsController@index'); Route::get('/', 'Api\Objects\ObjectsController@index');
Route::get('/{id}', 'Api\Objects\ObjectsController@show'); Route::get('/{id}', 'Api\Objects\ObjectsController@show');
Route::group(['middleware' => ['auth:api']], function() { Route::group(['middleware' => ['auth:api']], function() {
Route::put('objects/move/{id}', 'Api\Objects\ObjectsController@move'); Route::put('/move/{id}', 'Api\Objects\ObjectsController@move');
Route::delete('/{id}', 'Api\Objects\ObjectsController@destroy'); Route::delete('/{id}', 'Api\Objects\ObjectsController@destroy');
}); });
}); });

View File

@ -1,6 +1,9 @@
<?php <?php
use App\Models\Registries\Registry;
use App\Models\Registries\RegistryType;
use App\Models\User; use App\Models\User;
use App\Services\Registries\RulesetImportService;
use Illuminate\Support\Facades\Artisan; use Illuminate\Support\Facades\Artisan;
/* /*
@ -22,3 +25,12 @@ Artisan::command('dev:generate-personal-token {userId}', function ($userId) {
})->describe('Generates a personal access token for a user'); })->describe('Generates a personal access token for a user');
Artisan::command('htmlparser:import-rulesets', function() {
$registry = Registry::query()->where(['type' => RegistryType::RULESET])->first();
$url = "https://faufcc.ru/technical-regulation-in-constuction/formulary-list/";
$service = new RulesetImportService($registry, $url);
$service->import();
});