parent
1c45642789
commit
e9a718d57c
|
|
@ -5,7 +5,7 @@ namespace App\Imports;
|
|||
use App\Models\Asset;
|
||||
use App\Models\Registries\Registry;
|
||||
use App\Models\Registries\RegistryType;
|
||||
use App\Services\Documents\DocumentDownloadService;
|
||||
use App\Services\FileDownloadService;
|
||||
use Illuminate\Support\Collection;
|
||||
use Maatwebsite\Excel\Concerns\ToCollection;
|
||||
use Maatwebsite\Excel\Concerns\WithHeadingRow;
|
||||
|
|
@ -48,7 +48,7 @@ class NtdRegistryImport extends Import implements ToCollection, WithHeadingRow {
|
|||
}
|
||||
|
||||
public function download($url): ?Asset {
|
||||
return (new DocumentDownloadService())->download($url, 'registries/ntd');
|
||||
return (new FileDownloadService())->download($url, 'registries/ntd');
|
||||
}
|
||||
|
||||
public function checkLink($link) {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
namespace App\Services\Documents;
|
||||
|
||||
use App\Services\FileDownloadService;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use Illuminate\Support\Str;
|
||||
|
||||
|
|
@ -36,6 +37,6 @@ class DocumentGeneratorService {
|
|||
}
|
||||
|
||||
public function makeAsset($path, $name) {
|
||||
return (new DocumentDownloadService())->makeAsset($path, $name);
|
||||
return (new FileDownloadService())->makeAsset($path, $name);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,18 +1,23 @@
|
|||
<?php
|
||||
|
||||
namespace App\Services\Documents;
|
||||
namespace App\Services;
|
||||
|
||||
use App\Models\Asset;
|
||||
use Illuminate\Support\Facades\Auth;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
class DocumentDownloadService {
|
||||
protected array $mimes = [
|
||||
class FileDownloadService {
|
||||
protected array $documentMimes = [
|
||||
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'pdf' => 'application/pdf',
|
||||
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||||
];
|
||||
protected array $imageMimes = [
|
||||
'jpg' => 'image/jpg',
|
||||
'jpeg' => 'image/jpeg',
|
||||
'png' => 'image/png'
|
||||
];
|
||||
|
||||
|
||||
public function __construct() {
|
||||
|
|
@ -21,20 +26,25 @@ class DocumentDownloadService {
|
|||
|
||||
public function download($url, $dir = null, $filename = null): ?Asset {
|
||||
$info = pathinfo($url);
|
||||
if (!empty($this->mimes[$info['extension'] ?? null])) {
|
||||
$path = "public/documents";
|
||||
$filename = $filename ? "{$filename}.{$info['extension']}" : $info['basename'];
|
||||
$ext = $info['extension'] ?? null;
|
||||
if (!empty($this->documentMimes[$ext])) $path = 'public/documents';
|
||||
elseif (!empty($this->imageMimes[$ext])) $path = 'public/images';
|
||||
|
||||
if (!empty($path)) {echo("$url is trying to download\n");
|
||||
$filename = $filename ? "{$filename}.{$ext}" : $info['basename'];
|
||||
$path = $dir ? "{$path}/{$dir}/{$filename}" : "{$path}/{$filename}";
|
||||
$asset = Asset::query()->where(['path' => $path])->first();
|
||||
if (!$asset && Storage::put($path, Http::get($url)->body())) $asset = $this->makeAsset($path);
|
||||
elseif ($asset) var_dump($asset->path);
|
||||
if (!$asset && Storage::put($path, Http::get($url)->body())) {
|
||||
$asset = $this->makeAsset($path);
|
||||
echo("Downloaded {$asset->path}\n");
|
||||
} elseif ($asset) echo("{$asset->path} already exist\n");
|
||||
}
|
||||
return $asset ?? null;
|
||||
}
|
||||
public function makeAsset($path, $name = null) {
|
||||
$info = pathinfo($path);
|
||||
return Asset::create([
|
||||
'type' => 'document',
|
||||
'type' => !empty($this->documentMimes[$info['extension'] ?? null]) ? 'document' : 'image',
|
||||
'path' => $path,
|
||||
'mime' => $this->mimes[$info['extension']] ?? null,
|
||||
'name' => $name ?? $info['basename'],
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
<?php
|
||||
|
||||
namespace App\Services\Registries;
|
||||
|
||||
use App\Models\Pages\Page;
|
||||
use App\Models\Publications\PublicationType;
|
||||
use Illuminate\Support\Facades\Date;
|
||||
use Illuminate\Support\Str;
|
||||
use PHPHtmlParser\Dom;
|
||||
|
||||
class NewsImportService extends RegistryImportService {
|
||||
public function test() {
|
||||
|
||||
}
|
||||
|
||||
public function import() {
|
||||
$page = Page::byUrl('/press-tsentr/novosti');
|
||||
$nodes = $this->dom->find('article.pressRoomNews_article')->toArray();
|
||||
foreach ($nodes as $node) {
|
||||
$pre = $node->find('pre', 0);
|
||||
$img = $node->find('img', 0);
|
||||
$asset = $this->download(Str::replace('http://faufcc.ru.opt-images.1c-bitrix-cdn.ru', 'https://faufcc.ru', $img->src), 'publications/news');
|
||||
$link = $node->find('header a', 0);
|
||||
$serialized = $pre->text;
|
||||
$name = trim(explode('[', explode('[NAME] =>', $serialized)[1])[0]);
|
||||
$published_at = trim(explode('[', explode('[ACTIVE_FROM] =>', $serialized)[1] ?? null)[0] ?? null);
|
||||
$excerpt = trim(explode('[', explode('[PREVIEW_TEXT] =>', $serialized)[1] ?? null)[0] ?? null);
|
||||
$content = $this->parseContent("https://faufcc.ru{$link->href}");
|
||||
$model = $page->publications()->firstOrCreate(['name' => $name]);
|
||||
$model->update(['type' => PublicationType::NEWS, 'published_at' => $published_at ? Date::create($published_at) : null,
|
||||
'excerpt' => $excerpt, 'slug' => Str::slug($name), 'poster_id' => $asset->id ?? null, 'is_published' => true]);
|
||||
$section = $model->getObject('page-section-html', 'sections');
|
||||
$section->setValue('html-required', $content);
|
||||
}
|
||||
}
|
||||
|
||||
public function parseContent($url) {
|
||||
$dom = new Dom;
|
||||
$dom->loadFromUrl($url);
|
||||
$node = $dom->find('.user-container', 0);
|
||||
if (($v = $node->find('h1')) && $v->count()) $v->delete();
|
||||
if (($v = $node->find('img')) && $v->count()) $v->delete();
|
||||
return trim($node->innerHTML);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -4,7 +4,7 @@ namespace App\Services\Registries;
|
|||
|
||||
use App\Models\Asset;
|
||||
use App\Models\Registries\Registry;
|
||||
use App\Services\Documents\DocumentDownloadService;
|
||||
use App\Services\FileDownloadService;
|
||||
use Illuminate\Support\Str;
|
||||
use PHPHtmlParser\Dom;
|
||||
|
||||
|
|
@ -16,7 +16,8 @@ class RegistryImportService {
|
|||
protected array $mimes = [
|
||||
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'pdf' => 'application/pdf',
|
||||
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||||
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'jpg' => 'image/jpg'
|
||||
];
|
||||
|
||||
|
||||
|
|
@ -33,7 +34,7 @@ class RegistryImportService {
|
|||
if (empty($info['host'])) {
|
||||
$url = 'https://' . Str::replace('//', '/', "www.faufcc.ru/{$url}");
|
||||
}
|
||||
return (new DocumentDownloadService())->download($url, $dir, $filename);
|
||||
return (new FileDownloadService())->download($url, $dir, $filename);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -17,7 +17,7 @@ class CreateFieldHtmlValuesTable extends Migration
|
|||
$table->id();
|
||||
$table->integer('object_id')->index()->nullable();
|
||||
$table->integer('field_id')->index()->nullable();
|
||||
$table->text('value')->nullable();
|
||||
$table->mediumText('value')->nullable();
|
||||
$table->integer('ord')->index()->default(0);
|
||||
$table->timestamps();
|
||||
});
|
||||
|
|
|
|||
|
|
@ -62,8 +62,16 @@ Artisan::command('htmlparser:import-ts', function() {
|
|||
$service = new \App\Services\Registries\TechnicalCertificatesImportService($registry, "{$url}{$i}");
|
||||
$service->import();
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
Artisan::command('htmlparser:import-news', function() {
|
||||
$url = 'https://www.faufcc.ru/_press-tsentr/novosti/?PAGEN_1=';
|
||||
for ($i = 74; $i <= 88; $i++) {
|
||||
echo "Parsing page {$i}\n";
|
||||
$service = new \App\Services\Registries\NewsImportService(Registry::find(1), "{$url}{$i}");
|
||||
$service->import();
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue