fixes and updates by list from 29.08.2023 part 2

few fixes and updates
master
Константин 2023-08-31 01:10:59 +03:00
parent 1c45642789
commit e9a718d57c
7 changed files with 84 additions and 17 deletions

View File

@ -5,7 +5,7 @@ namespace App\Imports;
use App\Models\Asset;
use App\Models\Registries\Registry;
use App\Models\Registries\RegistryType;
use App\Services\Documents\DocumentDownloadService;
use App\Services\FileDownloadService;
use Illuminate\Support\Collection;
use Maatwebsite\Excel\Concerns\ToCollection;
use Maatwebsite\Excel\Concerns\WithHeadingRow;
@ -48,7 +48,7 @@ class NtdRegistryImport extends Import implements ToCollection, WithHeadingRow {
}
public function download($url): ?Asset {
return (new DocumentDownloadService())->download($url, 'registries/ntd');
return (new FileDownloadService())->download($url, 'registries/ntd');
}
public function checkLink($link) {

View File

@ -2,6 +2,7 @@
namespace App\Services\Documents;
use App\Services\FileDownloadService;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
@ -36,6 +37,6 @@ class DocumentGeneratorService {
}
public function makeAsset($path, $name) {
return (new DocumentDownloadService())->makeAsset($path, $name);
return (new FileDownloadService())->makeAsset($path, $name);
}
}

View File

@ -1,18 +1,23 @@
<?php
namespace App\Services\Documents;
namespace App\Services;
use App\Models\Asset;
use Illuminate\Support\Facades\Auth;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Storage;
class DocumentDownloadService {
protected array $mimes = [
class FileDownloadService {
protected array $documentMimes = [
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'pdf' => 'application/pdf',
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
];
protected array $imageMimes = [
'jpg' => 'image/jpg',
'jpeg' => 'image/jpeg',
'png' => 'image/png'
];
public function __construct() {
@ -21,20 +26,25 @@ class DocumentDownloadService {
public function download($url, $dir = null, $filename = null): ?Asset {
$info = pathinfo($url);
if (!empty($this->mimes[$info['extension'] ?? null])) {
$path = "public/documents";
$filename = $filename ? "{$filename}.{$info['extension']}" : $info['basename'];
$ext = $info['extension'] ?? null;
if (!empty($this->documentMimes[$ext])) $path = 'public/documents';
elseif (!empty($this->imageMimes[$ext])) $path = 'public/images';
if (!empty($path)) {echo("$url is trying to download\n");
$filename = $filename ? "{$filename}.{$ext}" : $info['basename'];
$path = $dir ? "{$path}/{$dir}/{$filename}" : "{$path}/{$filename}";
$asset = Asset::query()->where(['path' => $path])->first();
if (!$asset && Storage::put($path, Http::get($url)->body())) $asset = $this->makeAsset($path);
elseif ($asset) var_dump($asset->path);
if (!$asset && Storage::put($path, Http::get($url)->body())) {
$asset = $this->makeAsset($path);
echo("Downloaded {$asset->path}\n");
} elseif ($asset) echo("{$asset->path} already exist\n");
}
return $asset ?? null;
}
public function makeAsset($path, $name = null) {
$info = pathinfo($path);
return Asset::create([
'type' => 'document',
'type' => !empty($this->documentMimes[$info['extension'] ?? null]) ? 'document' : 'image',
'path' => $path,
'mime' => $this->mimes[$info['extension']] ?? null,
'name' => $name ?? $info['basename'],

View File

@ -0,0 +1,47 @@
<?php
namespace App\Services\Registries;
use App\Models\Pages\Page;
use App\Models\Publications\PublicationType;
use Illuminate\Support\Facades\Date;
use Illuminate\Support\Str;
use PHPHtmlParser\Dom;
class NewsImportService extends RegistryImportService {
public function test() {
}
public function import() {
$page = Page::byUrl('/press-tsentr/novosti');
$nodes = $this->dom->find('article.pressRoomNews_article')->toArray();
foreach ($nodes as $node) {
$pre = $node->find('pre', 0);
$img = $node->find('img', 0);
$asset = $this->download(Str::replace('http://faufcc.ru.opt-images.1c-bitrix-cdn.ru', 'https://faufcc.ru', $img->src), 'publications/news');
$link = $node->find('header a', 0);
$serialized = $pre->text;
$name = trim(explode('[', explode('[NAME] =>', $serialized)[1])[0]);
$published_at = trim(explode('[', explode('[ACTIVE_FROM] =>', $serialized)[1] ?? null)[0] ?? null);
$excerpt = trim(explode('[', explode('[PREVIEW_TEXT] =>', $serialized)[1] ?? null)[0] ?? null);
$content = $this->parseContent("https://faufcc.ru{$link->href}");
$model = $page->publications()->firstOrCreate(['name' => $name]);
$model->update(['type' => PublicationType::NEWS, 'published_at' => $published_at ? Date::create($published_at) : null,
'excerpt' => $excerpt, 'slug' => Str::slug($name), 'poster_id' => $asset->id ?? null, 'is_published' => true]);
$section = $model->getObject('page-section-html', 'sections');
$section->setValue('html-required', $content);
}
}
public function parseContent($url) {
$dom = new Dom;
$dom->loadFromUrl($url);
$node = $dom->find('.user-container', 0);
if (($v = $node->find('h1')) && $v->count()) $v->delete();
if (($v = $node->find('img')) && $v->count()) $v->delete();
return trim($node->innerHTML);
}
}

View File

@ -4,7 +4,7 @@ namespace App\Services\Registries;
use App\Models\Asset;
use App\Models\Registries\Registry;
use App\Services\Documents\DocumentDownloadService;
use App\Services\FileDownloadService;
use Illuminate\Support\Str;
use PHPHtmlParser\Dom;
@ -16,7 +16,8 @@ class RegistryImportService {
protected array $mimes = [
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'pdf' => 'application/pdf',
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'jpg' => 'image/jpg'
];
@ -33,7 +34,7 @@ class RegistryImportService {
if (empty($info['host'])) {
$url = 'https://' . Str::replace('//', '/', "www.faufcc.ru/{$url}");
}
return (new DocumentDownloadService())->download($url, $dir, $filename);
return (new FileDownloadService())->download($url, $dir, $filename);
}
}

View File

@ -17,7 +17,7 @@ class CreateFieldHtmlValuesTable extends Migration
$table->id();
$table->integer('object_id')->index()->nullable();
$table->integer('field_id')->index()->nullable();
$table->text('value')->nullable();
$table->mediumText('value')->nullable();
$table->integer('ord')->index()->default(0);
$table->timestamps();
});

View File

@ -62,8 +62,16 @@ Artisan::command('htmlparser:import-ts', function() {
$service = new \App\Services\Registries\TechnicalCertificatesImportService($registry, "{$url}{$i}");
$service->import();
}
});
Artisan::command('htmlparser:import-news', function() {
$url = 'https://www.faufcc.ru/_press-tsentr/novosti/?PAGEN_1=';
for ($i = 74; $i <= 88; $i++) {
echo "Parsing page {$i}\n";
$service = new \App\Services\Registries\NewsImportService(Registry::find(1), "{$url}{$i}");
$service->import();
}
});