diff --git a/app/Models/Pages/Page.php b/app/Models/Pages/Page.php index f698192..aaf6108 100644 --- a/app/Models/Pages/Page.php +++ b/app/Models/Pages/Page.php @@ -118,7 +118,7 @@ class Page extends Model { public static function byUrl($url) { if ($url = trim($url, '/ ')) { $query = self::query(); - collect(explode('/', $url))->reverse()->values()->each(function($slug, $index) use ($query) { + collect(explode('/', $url))->reverse()->values()->each(function($slug, $index) use($query) { if ($slug !== '') { $index ? $query->nthParentSlug($index, $slug) : $query->bySlug($slug); } diff --git a/app/Models/Publications/Publication.php b/app/Models/Publications/Publication.php index 7f4ddb3..4943ec9 100644 --- a/app/Models/Publications/Publication.php +++ b/app/Models/Publications/Publication.php @@ -86,7 +86,7 @@ class Publication extends Model { } public function getPublishDateRusAttribute(): string { - return $this->published_at ? $this->published_at->format('d') . ' ' . $this->published_at->getTranslatedMonthName('Do MMMM') . ' ' . $this->published_at->format('Y') . ' г.' : 'когда-то'; + return $this->published_at ? $this->published_at->format('d') . ' ' . $this->published_at->getTranslatedMonthName('Do MMMM') . ' ' . $this->published_at->format('Y') . ' г.' : 'Дата не указана'; } diff --git a/app/Services/FileDownloadService.php b/app/Services/FileDownloadService.php index 336f5e2..4bd187d 100644 --- a/app/Services/FileDownloadService.php +++ b/app/Services/FileDownloadService.php @@ -10,8 +10,11 @@ use Illuminate\Support\Facades\Storage; class FileDownloadService { protected array $documentMimes = [ 'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'doc' => 'application/msword', 'pdf' => 'application/pdf', - 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'xls' => 'application/vnd.ms-excel', + 'pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation' ]; protected array $imageMimes = [ 'jpg' => 'image/jpg', diff --git a/app/Services/Forms/Publications/PublicationNewsForms.php b/app/Services/Forms/Publications/PublicationNewsForms.php index 16d8f21..bb1c555 100644 --- a/app/Services/Forms/Publications/PublicationNewsForms.php +++ b/app/Services/Forms/Publications/PublicationNewsForms.php @@ -39,7 +39,7 @@ class PublicationNewsForms extends FormsService { 'title' => 'Название', 'type' => FieldType::STRING, 'required' => true, - 'max_length' => 127, + 'max_length' => 750, 'value' => $model->name ?? null ], [ diff --git a/app/Services/Forms/Publications/PublicationPhotosForms.php b/app/Services/Forms/Publications/PublicationPhotosForms.php index 6d068e1..232794a 100644 --- a/app/Services/Forms/Publications/PublicationPhotosForms.php +++ b/app/Services/Forms/Publications/PublicationPhotosForms.php @@ -50,7 +50,7 @@ class PublicationPhotosForms 'title' => 'Название', 'type' => FieldType::STRING, 'required' => true, - 'max_length' => 127, + 'max_length' => 750, 'value' => $model->name ?? null ], [ diff --git a/app/Services/Forms/Publications/PublicationPortfolioForms.php b/app/Services/Forms/Publications/PublicationPortfolioForms.php index 55e97ad..ee87801 100644 --- a/app/Services/Forms/Publications/PublicationPortfolioForms.php +++ b/app/Services/Forms/Publications/PublicationPortfolioForms.php @@ -36,7 +36,7 @@ class PublicationPortfolioForms 'title' => 'Название компании', 'type' => FieldType::STRING, 'required' => true, - 'max_length' => 127, + 'max_length' => 750, 'value' => $model->name ?? null ], [ diff --git a/app/Services/Forms/Publications/PublicationSmiForms.php b/app/Services/Forms/Publications/PublicationSmiForms.php index 1311f13..73233dd 100644 --- a/app/Services/Forms/Publications/PublicationSmiForms.php +++ b/app/Services/Forms/Publications/PublicationSmiForms.php @@ -43,7 +43,7 @@ class PublicationSmiForms 'title' => 'Название', 'type' => FieldType::STRING, 'required' => true, - 'max_length' => 127, + 'max_length' => 750, 'value' => $model->name ?? null ], [ diff --git a/app/Services/Registries/AnticorImportService.php b/app/Services/Registries/AnticorImportService.php new file mode 100644 index 0000000..e150663 --- /dev/null +++ b/app/Services/Registries/AnticorImportService.php @@ -0,0 +1,85 @@ +loadFromUrl($url); + $pages = $dom->find('.paging__item')->toArray(); + $key = null; + foreach ($pages as $k => $page) { + if ($page->class === 'paging__item active') $key = $k; + } + if (isset($key)) $next = $pages[$key + 1] ?? null; + $link = $next ? $next->find('a', 0) : null; + var_dump($link->href ?? null); + } + + public function import() { + $nodes = $this->dom->find('.user-container ul li a')->toArray(); + foreach ($nodes as $k => $node) { + //if ($k) return; + $category = $this->registry->addCategory(trim($node->text)); + $this->parseSubpage($node->href, $category); + $this->parseEntriesPage($node->href, $category); + } + } + + public function parseSubpage($url, Category $parent) { + $dom = new Dom; + $dom->loadFromUrl("https://faufcc.ru{$url}"); + $nodes = $dom->find('.user-container ul li a')->toArray(); + foreach ($nodes as $k => $node) { + $category = $parent->addCategory(trim($node->text)); + $this->parseEntriesPage($node->href, $category); + } + } + + + public function parseEntriesPage($url, Category $parent) { + $dom = new Dom; + $dom->loadFromUrl("https://faufcc.ru{$url}"); + $nodes = $dom->find('.user-container *')->toArray(); + foreach ($nodes as $k => $node) { + if ($node->tag->name() === 'h3') { + $category = $parent->addCategory(trim($node->text)); + } elseif ($node->tag->name() === 'table') { + $this->importEntries($category ?? $parent, $node->find('tr a')->toArray()); + } + } + if ($res = $this->getNextPage($dom)) $this->parseEntriesPage($res, $parent); + } + + + public function importEntries(Category $category, $items) { + foreach ($items as $item) { + $name = Str::limit(Str::replace('"', '"', trim($item->text)), 745); + $name = Str::replace('(', '«', $name); + $name = Str::replace(')', '»', $name); + $entry = $this->registry->entries()->firstOrCreate(['name' => $name, 'category_id' => $category->id ?? 0]); + $asset = $this->download($item->href, 'registries/anticor'); + $entry->update(['asset_id' => $asset->id ?? null]); + if (!$asset) $entry->update(['link' => $item->href]); + } + } + + + public function getNextPage($dom) { + $pages = $dom->find('.paging__item')->toArray(); + $key = null; + foreach ($pages as $k => $page) { + if ($page->class === 'paging__item active') $key = $k; + } + $next = isset($key) ? ($pages[$key + 1] ?? null) : null; + return $next ? $next->find('a', 0)->href : null; + } + + +} \ No newline at end of file diff --git a/app/Services/Registries/CleanerService.php b/app/Services/Registries/CleanerService.php new file mode 100644 index 0000000..6db2517 --- /dev/null +++ b/app/Services/Registries/CleanerService.php @@ -0,0 +1,68 @@ +cleanPublications(); + //$this->cleanRegistryEntries(); + $this->cleanStringValues(); + $this->cleanTextValues(); + $this->cleanHtmlValues(); + } + + public function cleanPublications() { + $items = Publication::all(); + foreach ($items as $item) { + $item->update(['name' => $this->cleanString($item->name), 'excerpt' => $this->cleanString($item->excerpt)]); + } + } + + public function cleanRegistryEntries() { + $items = Entry::all(); + foreach ($items as $item) { + $item->update(['name' => $this->cleanString($item->name)]); + } + } + + public function cleanStringValues() { + $items = StringValue::all(); + foreach ($items as $item) { + $item->update(['value' => $this->cleanString($item->value)]); + } + } + public function cleanTextValues() { + $items = TextValue::all(); + foreach ($items as $item) { + $item->update(['value' => $this->cleanString($item->value)]); + } + } + public function cleanHtmlValues() { + $items = HtmlValue::all(); + foreach ($items as $item) { + $item->update(['value' => $this->cleanString($item->value)]); + } + } + + + public function cleanString(?string $string): ?string { + if ($string) { + $string = Str::replace('"', '"', $string); + $string = Str::replace('(', '«', $string); + $string = Str::replace(')', '»', $string); + $string = trim(Str::replace('(***)', '', $string)); + } + return $string; + } + +} \ No newline at end of file diff --git a/app/Services/Registries/RegistryImportService.php b/app/Services/Registries/RegistryImportService.php index f45d6bc..fa61988 100644 --- a/app/Services/Registries/RegistryImportService.php +++ b/app/Services/Registries/RegistryImportService.php @@ -13,14 +13,6 @@ class RegistryImportService { protected string $url; protected Dom $dom; - protected array $mimes = [ - 'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'pdf' => 'application/pdf', - 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - 'jpg' => 'image/jpg' - ]; - - public function __construct(Registry $registry, string $url) { $this->registry = $registry; $this->url = $url; diff --git a/config/files.php b/config/files.php index 888e27c..e834ba8 100644 --- a/config/files.php +++ b/config/files.php @@ -1,5 +1,5 @@ 200 * 1024 * 1024 + 'maxsize' => 100 * 1024 * 1024 ]; diff --git a/database/migrations/2023_06_07_153956_create_publications_table.php b/database/migrations/2023_06_07_153956_create_publications_table.php index 82fcd4e..6629065 100644 --- a/database/migrations/2023_06_07_153956_create_publications_table.php +++ b/database/migrations/2023_06_07_153956_create_publications_table.php @@ -21,7 +21,7 @@ class CreatePublicationsTable extends Migration $table->integer('author_id')->index()->nullable(); $table->string('slug')->index()->nullable(); $table->string('type')->index()->nullable(); - $table->string('name')->index()->nullable(); + $table->string('name', 750)->index()->nullable(); $table->text('excerpt')->nullable(); $table->text('params')->nullable(); $table->boolean('is_published')->index()->default(0); diff --git a/routes/console.php b/routes/console.php index 2e238d1..1391829 100644 --- a/routes/console.php +++ b/routes/console.php @@ -28,6 +28,12 @@ Artisan::command('dev:generate-personal-token {userId}', function ($userId) { })->describe('Generates a personal access token for a user'); +Artisan::command('cleaner:clean', function() { + $service = new \App\Services\Registries\CleanerService(); + $service->clean(); +}); + + Artisan::command('htmlparser:import-rulesets', function() { $registry = Registry::query()->where(['type' => RegistryType::RULESET])->first(); @@ -73,5 +79,10 @@ Artisan::command('htmlparser:import-news', function() { } }); - +Artisan::command('htmlparser:import-anticor', function() { + $url = 'https://www.faufcc.ru/about-us/protivodeystvie-korruptsii/'; + $registry = \App\Models\Pages\Page::byUrl('/o-tsentre/protivodeistvie-korruptsii')->registry; + $service = new \App\Services\Registries\AnticorImportService($registry, $url); + $service->import(); +});