From 0a057d67c5adadfa9623c2169092b7a8c2b84767 Mon Sep 17 00:00:00 2001 From: panabonic Date: Sat, 2 Sep 2023 21:17:35 +0300 Subject: [PATCH 1/7] anticor registry import --- app/Models/Pages/Page.php | 2 +- app/Services/FileDownloadService.php | 5 +- .../Registries/AnticorImportService.php | 85 +++++++++++++++++++ .../Registries/RegistryImportService.php | 8 -- routes/console.php | 7 +- 5 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 app/Services/Registries/AnticorImportService.php diff --git a/app/Models/Pages/Page.php b/app/Models/Pages/Page.php index f698192..aaf6108 100644 --- a/app/Models/Pages/Page.php +++ b/app/Models/Pages/Page.php @@ -118,7 +118,7 @@ class Page extends Model { public static function byUrl($url) { if ($url = trim($url, '/ ')) { $query = self::query(); - collect(explode('/', $url))->reverse()->values()->each(function($slug, $index) use ($query) { + collect(explode('/', $url))->reverse()->values()->each(function($slug, $index) use($query) { if ($slug !== '') { $index ? $query->nthParentSlug($index, $slug) : $query->bySlug($slug); } diff --git a/app/Services/FileDownloadService.php b/app/Services/FileDownloadService.php index 336f5e2..4bd187d 100644 --- a/app/Services/FileDownloadService.php +++ b/app/Services/FileDownloadService.php @@ -10,8 +10,11 @@ use Illuminate\Support\Facades\Storage; class FileDownloadService { protected array $documentMimes = [ 'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'doc' => 'application/msword', 'pdf' => 'application/pdf', - 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'xls' => 'application/vnd.ms-excel', + 'pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation' ]; protected array $imageMimes = [ 'jpg' => 'image/jpg', diff --git a/app/Services/Registries/AnticorImportService.php b/app/Services/Registries/AnticorImportService.php new file mode 100644 index 0000000..e150663 --- /dev/null +++ b/app/Services/Registries/AnticorImportService.php @@ -0,0 +1,85 @@ +loadFromUrl($url); + $pages = $dom->find('.paging__item')->toArray(); + $key = null; + foreach ($pages as $k => $page) { + if ($page->class === 'paging__item active') $key = $k; + } + if (isset($key)) $next = $pages[$key + 1] ?? null; + $link = $next ? $next->find('a', 0) : null; + var_dump($link->href ?? null); + } + + public function import() { + $nodes = $this->dom->find('.user-container ul li a')->toArray(); + foreach ($nodes as $k => $node) { + //if ($k) return; + $category = $this->registry->addCategory(trim($node->text)); + $this->parseSubpage($node->href, $category); + $this->parseEntriesPage($node->href, $category); + } + } + + public function parseSubpage($url, Category $parent) { + $dom = new Dom; + $dom->loadFromUrl("https://faufcc.ru{$url}"); + $nodes = $dom->find('.user-container ul li a')->toArray(); + foreach ($nodes as $k => $node) { + $category = $parent->addCategory(trim($node->text)); + $this->parseEntriesPage($node->href, $category); + } + } + + + public function parseEntriesPage($url, Category $parent) { + $dom = new Dom; + $dom->loadFromUrl("https://faufcc.ru{$url}"); + $nodes = $dom->find('.user-container *')->toArray(); + foreach ($nodes as $k => $node) { + if ($node->tag->name() === 'h3') { + $category = $parent->addCategory(trim($node->text)); + } elseif ($node->tag->name() === 'table') { + $this->importEntries($category ?? $parent, $node->find('tr a')->toArray()); + } + } + if ($res = $this->getNextPage($dom)) $this->parseEntriesPage($res, $parent); + } + + + public function importEntries(Category $category, $items) { + foreach ($items as $item) { + $name = Str::limit(Str::replace('"', '"', trim($item->text)), 745); + $name = Str::replace('(', '«', $name); + $name = Str::replace(')', '»', $name); + $entry = $this->registry->entries()->firstOrCreate(['name' => $name, 'category_id' => $category->id ?? 0]); + $asset = $this->download($item->href, 'registries/anticor'); + $entry->update(['asset_id' => $asset->id ?? null]); + if (!$asset) $entry->update(['link' => $item->href]); + } + } + + + public function getNextPage($dom) { + $pages = $dom->find('.paging__item')->toArray(); + $key = null; + foreach ($pages as $k => $page) { + if ($page->class === 'paging__item active') $key = $k; + } + $next = isset($key) ? ($pages[$key + 1] ?? null) : null; + return $next ? $next->find('a', 0)->href : null; + } + + +} \ No newline at end of file diff --git a/app/Services/Registries/RegistryImportService.php b/app/Services/Registries/RegistryImportService.php index f45d6bc..fa61988 100644 --- a/app/Services/Registries/RegistryImportService.php +++ b/app/Services/Registries/RegistryImportService.php @@ -13,14 +13,6 @@ class RegistryImportService { protected string $url; protected Dom $dom; - protected array $mimes = [ - 'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'pdf' => 'application/pdf', - 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - 'jpg' => 'image/jpg' - ]; - - public function __construct(Registry $registry, string $url) { $this->registry = $registry; $this->url = $url; diff --git a/routes/console.php b/routes/console.php index 2e238d1..b78034c 100644 --- a/routes/console.php +++ b/routes/console.php @@ -73,5 +73,10 @@ Artisan::command('htmlparser:import-news', function() { } }); - +Artisan::command('htmlparser:import-anticor', function() { + $url = 'https://www.faufcc.ru/about-us/protivodeystvie-korruptsii/'; + $registry = \App\Models\Pages\Page::byUrl('/o-tsentre/protivodeistvie-korruptsii')->registry; + $service = new \App\Services\Registries\AnticorImportService($registry, $url); + $service->import(); +}); From 6fdca191e078b43bd742ff9bafa43ace18d08dc2 Mon Sep 17 00:00:00 2001 From: panabonic Date: Tue, 5 Sep 2023 11:08:45 +0300 Subject: [PATCH 2/7] publication name length fix --- app/Services/Forms/Publications/PublicationNewsForms.php | 2 +- app/Services/Forms/Publications/PublicationPhotosForms.php | 2 +- app/Services/Forms/Publications/PublicationPortfolioForms.php | 2 +- app/Services/Forms/Publications/PublicationSmiForms.php | 2 +- .../migrations/2023_06_07_153956_create_publications_table.php | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/Services/Forms/Publications/PublicationNewsForms.php b/app/Services/Forms/Publications/PublicationNewsForms.php index 16d8f21..bb1c555 100644 --- a/app/Services/Forms/Publications/PublicationNewsForms.php +++ b/app/Services/Forms/Publications/PublicationNewsForms.php @@ -39,7 +39,7 @@ class PublicationNewsForms extends FormsService { 'title' => 'Название', 'type' => FieldType::STRING, 'required' => true, - 'max_length' => 127, + 'max_length' => 750, 'value' => $model->name ?? null ], [ diff --git a/app/Services/Forms/Publications/PublicationPhotosForms.php b/app/Services/Forms/Publications/PublicationPhotosForms.php index 6d068e1..232794a 100644 --- a/app/Services/Forms/Publications/PublicationPhotosForms.php +++ b/app/Services/Forms/Publications/PublicationPhotosForms.php @@ -50,7 +50,7 @@ class PublicationPhotosForms 'title' => 'Название', 'type' => FieldType::STRING, 'required' => true, - 'max_length' => 127, + 'max_length' => 750, 'value' => $model->name ?? null ], [ diff --git a/app/Services/Forms/Publications/PublicationPortfolioForms.php b/app/Services/Forms/Publications/PublicationPortfolioForms.php index 55e97ad..ee87801 100644 --- a/app/Services/Forms/Publications/PublicationPortfolioForms.php +++ b/app/Services/Forms/Publications/PublicationPortfolioForms.php @@ -36,7 +36,7 @@ class PublicationPortfolioForms 'title' => 'Название компании', 'type' => FieldType::STRING, 'required' => true, - 'max_length' => 127, + 'max_length' => 750, 'value' => $model->name ?? null ], [ diff --git a/app/Services/Forms/Publications/PublicationSmiForms.php b/app/Services/Forms/Publications/PublicationSmiForms.php index 1311f13..73233dd 100644 --- a/app/Services/Forms/Publications/PublicationSmiForms.php +++ b/app/Services/Forms/Publications/PublicationSmiForms.php @@ -43,7 +43,7 @@ class PublicationSmiForms 'title' => 'Название', 'type' => FieldType::STRING, 'required' => true, - 'max_length' => 127, + 'max_length' => 750, 'value' => $model->name ?? null ], [ diff --git a/database/migrations/2023_06_07_153956_create_publications_table.php b/database/migrations/2023_06_07_153956_create_publications_table.php index 82fcd4e..6629065 100644 --- a/database/migrations/2023_06_07_153956_create_publications_table.php +++ b/database/migrations/2023_06_07_153956_create_publications_table.php @@ -21,7 +21,7 @@ class CreatePublicationsTable extends Migration $table->integer('author_id')->index()->nullable(); $table->string('slug')->index()->nullable(); $table->string('type')->index()->nullable(); - $table->string('name')->index()->nullable(); + $table->string('name', 750)->index()->nullable(); $table->text('excerpt')->nullable(); $table->text('params')->nullable(); $table->boolean('is_published')->index()->default(0); From e69b219e32d3d6390414d3f624e1316e0107bf8f Mon Sep 17 00:00:00 2001 From: panabonic Date: Thu, 7 Sep 2023 10:59:34 +0300 Subject: [PATCH 3/7] cleaner service --- app/Services/Registries/CleanerService.php | 40 ++++++++++++++++++++++ routes/console.php | 5 +++ 2 files changed, 45 insertions(+) create mode 100644 app/Services/Registries/CleanerService.php diff --git a/app/Services/Registries/CleanerService.php b/app/Services/Registries/CleanerService.php new file mode 100644 index 0000000..e08a2d9 --- /dev/null +++ b/app/Services/Registries/CleanerService.php @@ -0,0 +1,40 @@ +cleanPublications(); + $this->cleanRegistryEntries(); + } + + public function cleanPublications() { + $items = Publication::all(); + foreach ($items as $item) { + $item->update(['name' => $this->cleanString($item->name), 'excerpt' => $this->cleanString($item->excerpt)]); + } + } + + public function cleanRegistryEntries() { + $items = Entry::all(); + foreach ($items as $item) { + $item->update(['name' => $this->cleanString($item->name)]); + } + } + + + public function cleanString(string $string): string { + $string = Str::replace('"', '"', $string); + $string = Str::replace('(', '«', $string); + $string = Str::replace(')', '»', $string); + return trim(Str::replace('(***)', '', $string)); + } + +} \ No newline at end of file diff --git a/routes/console.php b/routes/console.php index b78034c..50ee834 100644 --- a/routes/console.php +++ b/routes/console.php @@ -28,6 +28,11 @@ Artisan::command('dev:generate-personal-token {userId}', function ($userId) { })->describe('Generates a personal access token for a user'); +Artisan::command('cleaner:clean', function() { + +}); + + Artisan::command('htmlparser:import-rulesets', function() { $registry = Registry::query()->where(['type' => RegistryType::RULESET])->first(); From 7a7c567b73a393185f229c66f98489af801bfd5b Mon Sep 17 00:00:00 2001 From: panabonic Date: Thu, 7 Sep 2023 11:01:26 +0300 Subject: [PATCH 4/7] cleaner service --- routes/console.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/routes/console.php b/routes/console.php index 50ee834..1391829 100644 --- a/routes/console.php +++ b/routes/console.php @@ -29,7 +29,8 @@ Artisan::command('dev:generate-personal-token {userId}', function ($userId) { Artisan::command('cleaner:clean', function() { - + $service = new \App\Services\Registries\CleanerService(); + $service->clean(); }); From 7d0a4fdba1bbd7d0dc68899d21cf9e8f25c5f558 Mon Sep 17 00:00:00 2001 From: panabonic Date: Thu, 7 Sep 2023 11:04:44 +0300 Subject: [PATCH 5/7] cleaner service --- app/Services/Registries/CleanerService.php | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/app/Services/Registries/CleanerService.php b/app/Services/Registries/CleanerService.php index e08a2d9..3366245 100644 --- a/app/Services/Registries/CleanerService.php +++ b/app/Services/Registries/CleanerService.php @@ -30,11 +30,14 @@ class CleanerService { } - public function cleanString(string $string): string { - $string = Str::replace('"', '"', $string); - $string = Str::replace('(', '«', $string); - $string = Str::replace(')', '»', $string); - return trim(Str::replace('(***)', '', $string)); + public function cleanString(?string $string): ?string { + if ($string) { + $string = Str::replace('"', '"', $string); + $string = Str::replace('(', '«', $string); + $string = Str::replace(')', '»', $string); + $string = trim(Str::replace('(***)', '', $string)); + } + return $string; } } \ No newline at end of file From 8112212c95eb3a072a5853d0973f0280c08fe4bb Mon Sep 17 00:00:00 2001 From: panabonic Date: Thu, 7 Sep 2023 11:15:16 +0300 Subject: [PATCH 6/7] minor fix --- app/Models/Publications/Publication.php | 2 +- config/files.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/Models/Publications/Publication.php b/app/Models/Publications/Publication.php index 7f4ddb3..4943ec9 100644 --- a/app/Models/Publications/Publication.php +++ b/app/Models/Publications/Publication.php @@ -86,7 +86,7 @@ class Publication extends Model { } public function getPublishDateRusAttribute(): string { - return $this->published_at ? $this->published_at->format('d') . ' ' . $this->published_at->getTranslatedMonthName('Do MMMM') . ' ' . $this->published_at->format('Y') . ' г.' : 'когда-то'; + return $this->published_at ? $this->published_at->format('d') . ' ' . $this->published_at->getTranslatedMonthName('Do MMMM') . ' ' . $this->published_at->format('Y') . ' г.' : 'Дата не указана'; } diff --git a/config/files.php b/config/files.php index 888e27c..e834ba8 100644 --- a/config/files.php +++ b/config/files.php @@ -1,5 +1,5 @@ 200 * 1024 * 1024 + 'maxsize' => 100 * 1024 * 1024 ]; From 41a7cd65cfd1b8ab06a05eb7596639b49baadf79 Mon Sep 17 00:00:00 2001 From: panabonic Date: Thu, 7 Sep 2023 11:27:15 +0300 Subject: [PATCH 7/7] cleaner update --- app/Services/Registries/CleanerService.php | 29 ++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/app/Services/Registries/CleanerService.php b/app/Services/Registries/CleanerService.php index 3366245..6db2517 100644 --- a/app/Services/Registries/CleanerService.php +++ b/app/Services/Registries/CleanerService.php @@ -2,6 +2,9 @@ namespace App\Services\Registries; +use App\Models\Objects\Values\HtmlValue; +use App\Models\Objects\Values\StringValue; +use App\Models\Objects\Values\TextValue; use App\Models\Publications\Publication; use App\Models\Registries\Entry; use Illuminate\Support\Str; @@ -11,8 +14,11 @@ class CleanerService { } public function clean() { - $this->cleanPublications(); - $this->cleanRegistryEntries(); + //$this->cleanPublications(); + //$this->cleanRegistryEntries(); + $this->cleanStringValues(); + $this->cleanTextValues(); + $this->cleanHtmlValues(); } public function cleanPublications() { @@ -29,6 +35,25 @@ class CleanerService { } } + public function cleanStringValues() { + $items = StringValue::all(); + foreach ($items as $item) { + $item->update(['value' => $this->cleanString($item->value)]); + } + } + public function cleanTextValues() { + $items = TextValue::all(); + foreach ($items as $item) { + $item->update(['value' => $this->cleanString($item->value)]); + } + } + public function cleanHtmlValues() { + $items = HtmlValue::all(); + foreach ($items as $item) { + $item->update(['value' => $this->cleanString($item->value)]); + } + } + public function cleanString(?string $string): ?string { if ($string) {