From e9a718d57ccc86cdd2bfec9cb0ec0a1ad98d58dd Mon Sep 17 00:00:00 2001 From: panabonic Date: Thu, 31 Aug 2023 01:10:59 +0300 Subject: [PATCH] fixes and updates by list from 29.08.2023 part 2 few fixes and updates --- app/Imports/NtdRegistryImport.php | 4 +- .../Documents/DocumentGeneratorService.php | 3 +- ...oadService.php => FileDownloadService.php} | 28 +++++++---- app/Services/Registries/NewsImportService.php | 47 +++++++++++++++++++ .../Registries/RegistryImportService.php | 7 +-- ..._162647_create_field_html_values_table.php | 2 +- routes/console.php | 10 +++- 7 files changed, 84 insertions(+), 17 deletions(-) rename app/Services/{Documents/DocumentDownloadService.php => FileDownloadService.php} (58%) create mode 100644 app/Services/Registries/NewsImportService.php diff --git a/app/Imports/NtdRegistryImport.php b/app/Imports/NtdRegistryImport.php index 2f43f5b..4150a3c 100644 --- a/app/Imports/NtdRegistryImport.php +++ b/app/Imports/NtdRegistryImport.php @@ -5,7 +5,7 @@ namespace App\Imports; use App\Models\Asset; use App\Models\Registries\Registry; use App\Models\Registries\RegistryType; -use App\Services\Documents\DocumentDownloadService; +use App\Services\FileDownloadService; use Illuminate\Support\Collection; use Maatwebsite\Excel\Concerns\ToCollection; use Maatwebsite\Excel\Concerns\WithHeadingRow; @@ -48,7 +48,7 @@ class NtdRegistryImport extends Import implements ToCollection, WithHeadingRow { } public function download($url): ?Asset { - return (new DocumentDownloadService())->download($url, 'registries/ntd'); + return (new FileDownloadService())->download($url, 'registries/ntd'); } public function checkLink($link) { diff --git a/app/Services/Documents/DocumentGeneratorService.php b/app/Services/Documents/DocumentGeneratorService.php index 8ea1b41..97d40be 100644 --- a/app/Services/Documents/DocumentGeneratorService.php +++ b/app/Services/Documents/DocumentGeneratorService.php @@ -2,6 +2,7 @@ namespace App\Services\Documents; +use App\Services\FileDownloadService; use Illuminate\Support\Facades\Storage; use Illuminate\Support\Str; @@ -36,6 +37,6 @@ class DocumentGeneratorService { } public function makeAsset($path, $name) { - return (new DocumentDownloadService())->makeAsset($path, $name); + return (new FileDownloadService())->makeAsset($path, $name); } } \ No newline at end of file diff --git a/app/Services/Documents/DocumentDownloadService.php b/app/Services/FileDownloadService.php similarity index 58% rename from app/Services/Documents/DocumentDownloadService.php rename to app/Services/FileDownloadService.php index a376c02..336f5e2 100644 --- a/app/Services/Documents/DocumentDownloadService.php +++ b/app/Services/FileDownloadService.php @@ -1,18 +1,23 @@ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'pdf' => 'application/pdf', 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ]; + protected array $imageMimes = [ + 'jpg' => 'image/jpg', + 'jpeg' => 'image/jpeg', + 'png' => 'image/png' + ]; public function __construct() { @@ -21,20 +26,25 @@ class DocumentDownloadService { public function download($url, $dir = null, $filename = null): ?Asset { $info = pathinfo($url); - if (!empty($this->mimes[$info['extension'] ?? null])) { - $path = "public/documents"; - $filename = $filename ? "{$filename}.{$info['extension']}" : $info['basename']; + $ext = $info['extension'] ?? null; + if (!empty($this->documentMimes[$ext])) $path = 'public/documents'; + elseif (!empty($this->imageMimes[$ext])) $path = 'public/images'; + + if (!empty($path)) {echo("$url is trying to download\n"); + $filename = $filename ? "{$filename}.{$ext}" : $info['basename']; $path = $dir ? "{$path}/{$dir}/{$filename}" : "{$path}/{$filename}"; $asset = Asset::query()->where(['path' => $path])->first(); - if (!$asset && Storage::put($path, Http::get($url)->body())) $asset = $this->makeAsset($path); - elseif ($asset) var_dump($asset->path); + if (!$asset && Storage::put($path, Http::get($url)->body())) { + $asset = $this->makeAsset($path); + echo("Downloaded {$asset->path}\n"); + } elseif ($asset) echo("{$asset->path} already exist\n"); } return $asset ?? null; } public function makeAsset($path, $name = null) { $info = pathinfo($path); return Asset::create([ - 'type' => 'document', + 'type' => !empty($this->documentMimes[$info['extension'] ?? null]) ? 'document' : 'image', 'path' => $path, 'mime' => $this->mimes[$info['extension']] ?? null, 'name' => $name ?? $info['basename'], diff --git a/app/Services/Registries/NewsImportService.php b/app/Services/Registries/NewsImportService.php new file mode 100644 index 0000000..86b720d --- /dev/null +++ b/app/Services/Registries/NewsImportService.php @@ -0,0 +1,47 @@ +dom->find('article.pressRoomNews_article')->toArray(); + foreach ($nodes as $node) { + $pre = $node->find('pre', 0); + $img = $node->find('img', 0); + $asset = $this->download(Str::replace('http://faufcc.ru.opt-images.1c-bitrix-cdn.ru', 'https://faufcc.ru', $img->src), 'publications/news'); + $link = $node->find('header a', 0); + $serialized = $pre->text; + $name = trim(explode('[', explode('[NAME] =>', $serialized)[1])[0]); + $published_at = trim(explode('[', explode('[ACTIVE_FROM] =>', $serialized)[1] ?? null)[0] ?? null); + $excerpt = trim(explode('[', explode('[PREVIEW_TEXT] =>', $serialized)[1] ?? null)[0] ?? null); + $content = $this->parseContent("https://faufcc.ru{$link->href}"); + $model = $page->publications()->firstOrCreate(['name' => $name]); + $model->update(['type' => PublicationType::NEWS, 'published_at' => $published_at ? Date::create($published_at) : null, + 'excerpt' => $excerpt, 'slug' => Str::slug($name), 'poster_id' => $asset->id ?? null, 'is_published' => true]); + $section = $model->getObject('page-section-html', 'sections'); + $section->setValue('html-required', $content); + } + } + + public function parseContent($url) { + $dom = new Dom; + $dom->loadFromUrl($url); + $node = $dom->find('.user-container', 0); + if (($v = $node->find('h1')) && $v->count()) $v->delete(); + if (($v = $node->find('img')) && $v->count()) $v->delete(); + return trim($node->innerHTML); + } + + +} \ No newline at end of file diff --git a/app/Services/Registries/RegistryImportService.php b/app/Services/Registries/RegistryImportService.php index c1d0019..f45d6bc 100644 --- a/app/Services/Registries/RegistryImportService.php +++ b/app/Services/Registries/RegistryImportService.php @@ -4,7 +4,7 @@ namespace App\Services\Registries; use App\Models\Asset; use App\Models\Registries\Registry; -use App\Services\Documents\DocumentDownloadService; +use App\Services\FileDownloadService; use Illuminate\Support\Str; use PHPHtmlParser\Dom; @@ -16,7 +16,8 @@ class RegistryImportService { protected array $mimes = [ 'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'pdf' => 'application/pdf', - 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + 'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'jpg' => 'image/jpg' ]; @@ -33,7 +34,7 @@ class RegistryImportService { if (empty($info['host'])) { $url = 'https://' . Str::replace('//', '/', "www.faufcc.ru/{$url}"); } - return (new DocumentDownloadService())->download($url, $dir, $filename); + return (new FileDownloadService())->download($url, $dir, $filename); } } \ No newline at end of file diff --git a/database/migrations/2023_06_06_162647_create_field_html_values_table.php b/database/migrations/2023_06_06_162647_create_field_html_values_table.php index 9456a7d..cd88227 100644 --- a/database/migrations/2023_06_06_162647_create_field_html_values_table.php +++ b/database/migrations/2023_06_06_162647_create_field_html_values_table.php @@ -17,7 +17,7 @@ class CreateFieldHtmlValuesTable extends Migration $table->id(); $table->integer('object_id')->index()->nullable(); $table->integer('field_id')->index()->nullable(); - $table->text('value')->nullable(); + $table->mediumText('value')->nullable(); $table->integer('ord')->index()->default(0); $table->timestamps(); }); diff --git a/routes/console.php b/routes/console.php index 05c5c16..7f76592 100644 --- a/routes/console.php +++ b/routes/console.php @@ -62,8 +62,16 @@ Artisan::command('htmlparser:import-ts', function() { $service = new \App\Services\Registries\TechnicalCertificatesImportService($registry, "{$url}{$i}"); $service->import(); } +}); - +Artisan::command('htmlparser:import-news', function() { + $url = 'https://www.faufcc.ru/_press-tsentr/novosti/?PAGEN_1='; + for ($i = 74; $i <= 88; $i++) { + echo "Parsing page {$i}\n"; + $service = new \App\Services\Registries\NewsImportService(Registry::find(1), "{$url}{$i}"); + $service->import(); + } }); +