sergeybodin 2023-09-07 11:53:23 +03:00
commit f727628fd6
13 changed files with 177 additions and 18 deletions

View File

@ -118,7 +118,7 @@ class Page extends Model {
public static function byUrl($url) {
if ($url = trim($url, '/ ')) {
$query = self::query();
collect(explode('/', $url))->reverse()->values()->each(function($slug, $index) use ($query) {
collect(explode('/', $url))->reverse()->values()->each(function($slug, $index) use($query) {
if ($slug !== '') {
$index ? $query->nthParentSlug($index, $slug) : $query->bySlug($slug);
}

View File

@ -86,7 +86,7 @@ class Publication extends Model {
}
public function getPublishDateRusAttribute(): string {
return $this->published_at ? $this->published_at->format('d') . ' ' . $this->published_at->getTranslatedMonthName('Do MMMM') . ' ' . $this->published_at->format('Y') . ' г.' : 'когда-то';
return $this->published_at ? $this->published_at->format('d') . ' ' . $this->published_at->getTranslatedMonthName('Do MMMM') . ' ' . $this->published_at->format('Y') . ' г.' : 'Дата не указана';
}

View File

@ -10,8 +10,11 @@ use Illuminate\Support\Facades\Storage;
class FileDownloadService {
protected array $documentMimes = [
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'doc' => 'application/msword',
'pdf' => 'application/pdf',
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'xls' => 'application/vnd.ms-excel',
'pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
];
protected array $imageMimes = [
'jpg' => 'image/jpg',

View File

@ -39,7 +39,7 @@ class PublicationNewsForms extends FormsService {
'title' => 'Название',
'type' => FieldType::STRING,
'required' => true,
'max_length' => 127,
'max_length' => 750,
'value' => $model->name ?? null
],
[

View File

@ -50,7 +50,7 @@ class PublicationPhotosForms
'title' => 'Название',
'type' => FieldType::STRING,
'required' => true,
'max_length' => 127,
'max_length' => 750,
'value' => $model->name ?? null
],
[

View File

@ -36,7 +36,7 @@ class PublicationPortfolioForms
'title' => 'Название компании',
'type' => FieldType::STRING,
'required' => true,
'max_length' => 127,
'max_length' => 750,
'value' => $model->name ?? null
],
[

View File

@ -43,7 +43,7 @@ class PublicationSmiForms
'title' => 'Название',
'type' => FieldType::STRING,
'required' => true,
'max_length' => 127,
'max_length' => 750,
'value' => $model->name ?? null
],
[

View File

@ -0,0 +1,85 @@
<?php
namespace App\Services\Registries;
use App\Models\Registries\Category;
use Illuminate\Support\Str;
use PHPHtmlParser\Dom;
class AnticorImportService extends RegistryImportService {
public function test() {
$url = "https://www.faufcc.ru/about-us/protivodeystvie-korruptsii/normativnye-pravovye-akty/vedomstvennye-normativnye/index.php?PAGEN_1=3";
$dom = new Dom;
$dom->loadFromUrl($url);
$pages = $dom->find('.paging__item')->toArray();
$key = null;
foreach ($pages as $k => $page) {
if ($page->class === 'paging__item active') $key = $k;
}
if (isset($key)) $next = $pages[$key + 1] ?? null;
$link = $next ? $next->find('a', 0) : null;
var_dump($link->href ?? null);
}
public function import() {
$nodes = $this->dom->find('.user-container ul li a')->toArray();
foreach ($nodes as $k => $node) {
//if ($k) return;
$category = $this->registry->addCategory(trim($node->text));
$this->parseSubpage($node->href, $category);
$this->parseEntriesPage($node->href, $category);
}
}
public function parseSubpage($url, Category $parent) {
$dom = new Dom;
$dom->loadFromUrl("https://faufcc.ru{$url}");
$nodes = $dom->find('.user-container ul li a')->toArray();
foreach ($nodes as $k => $node) {
$category = $parent->addCategory(trim($node->text));
$this->parseEntriesPage($node->href, $category);
}
}
public function parseEntriesPage($url, Category $parent) {
$dom = new Dom;
$dom->loadFromUrl("https://faufcc.ru{$url}");
$nodes = $dom->find('.user-container *')->toArray();
foreach ($nodes as $k => $node) {
if ($node->tag->name() === 'h3') {
$category = $parent->addCategory(trim($node->text));
} elseif ($node->tag->name() === 'table') {
$this->importEntries($category ?? $parent, $node->find('tr a')->toArray());
}
}
if ($res = $this->getNextPage($dom)) $this->parseEntriesPage($res, $parent);
}
public function importEntries(Category $category, $items) {
foreach ($items as $item) {
$name = Str::limit(Str::replace('&quot;', '"', trim($item->text)), 745);
$name = Str::replace('&#40;', '«', $name);
$name = Str::replace('&#41;', '»', $name);
$entry = $this->registry->entries()->firstOrCreate(['name' => $name, 'category_id' => $category->id ?? 0]);
$asset = $this->download($item->href, 'registries/anticor');
$entry->update(['asset_id' => $asset->id ?? null]);
if (!$asset) $entry->update(['link' => $item->href]);
}
}
public function getNextPage($dom) {
$pages = $dom->find('.paging__item')->toArray();
$key = null;
foreach ($pages as $k => $page) {
if ($page->class === 'paging__item active') $key = $k;
}
$next = isset($key) ? ($pages[$key + 1] ?? null) : null;
return $next ? $next->find('a', 0)->href : null;
}
}

View File

@ -0,0 +1,68 @@
<?php
namespace App\Services\Registries;
use App\Models\Objects\Values\HtmlValue;
use App\Models\Objects\Values\StringValue;
use App\Models\Objects\Values\TextValue;
use App\Models\Publications\Publication;
use App\Models\Registries\Entry;
use Illuminate\Support\Str;
class CleanerService {
public function __construct() {
}
public function clean() {
//$this->cleanPublications();
//$this->cleanRegistryEntries();
$this->cleanStringValues();
$this->cleanTextValues();
$this->cleanHtmlValues();
}
public function cleanPublications() {
$items = Publication::all();
foreach ($items as $item) {
$item->update(['name' => $this->cleanString($item->name), 'excerpt' => $this->cleanString($item->excerpt)]);
}
}
public function cleanRegistryEntries() {
$items = Entry::all();
foreach ($items as $item) {
$item->update(['name' => $this->cleanString($item->name)]);
}
}
public function cleanStringValues() {
$items = StringValue::all();
foreach ($items as $item) {
$item->update(['value' => $this->cleanString($item->value)]);
}
}
public function cleanTextValues() {
$items = TextValue::all();
foreach ($items as $item) {
$item->update(['value' => $this->cleanString($item->value)]);
}
}
public function cleanHtmlValues() {
$items = HtmlValue::all();
foreach ($items as $item) {
$item->update(['value' => $this->cleanString($item->value)]);
}
}
public function cleanString(?string $string): ?string {
if ($string) {
$string = Str::replace('&quot;', '"', $string);
$string = Str::replace('&#40', '«', $string);
$string = Str::replace('&#41', '»', $string);
$string = trim(Str::replace('(***)', '', $string));
}
return $string;
}
}

View File

@ -13,14 +13,6 @@ class RegistryImportService {
protected string $url;
protected Dom $dom;
protected array $mimes = [
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'pdf' => 'application/pdf',
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'jpg' => 'image/jpg'
];
public function __construct(Registry $registry, string $url) {
$this->registry = $registry;
$this->url = $url;

View File

@ -1,5 +1,5 @@
<?php
return [
'maxsize' => 200 * 1024 * 1024
'maxsize' => 100 * 1024 * 1024
];

View File

@ -21,7 +21,7 @@ class CreatePublicationsTable extends Migration
$table->integer('author_id')->index()->nullable();
$table->string('slug')->index()->nullable();
$table->string('type')->index()->nullable();
$table->string('name')->index()->nullable();
$table->string('name', 750)->index()->nullable();
$table->text('excerpt')->nullable();
$table->text('params')->nullable();
$table->boolean('is_published')->index()->default(0);

View File

@ -28,6 +28,12 @@ Artisan::command('dev:generate-personal-token {userId}', function ($userId) {
})->describe('Generates a personal access token for a user');
Artisan::command('cleaner:clean', function() {
$service = new \App\Services\Registries\CleanerService();
$service->clean();
});
Artisan::command('htmlparser:import-rulesets', function() {
$registry = Registry::query()->where(['type' => RegistryType::RULESET])->first();
@ -73,5 +79,10 @@ Artisan::command('htmlparser:import-news', function() {
}
});
Artisan::command('htmlparser:import-anticor', function() {
$url = 'https://www.faufcc.ru/about-us/protivodeystvie-korruptsii/';
$registry = \App\Models\Pages\Page::byUrl('/o-tsentre/protivodeistvie-korruptsii')->registry;
$service = new \App\Services\Registries\AnticorImportService($registry, $url);
$service->import();
});