1
0
mirror of https://github.com/BookStackApp/BookStack.git synced 2025-07-28 17:02:04 +03:00

Updated markdown export implementation

- Removed ZIP system for now, until the idea can be fleshed out.
- Added testing to cover.
- Upgraded used library.
- Added custom handling for BookStack callouts.
- Added HTML cleanup to better produce output for things like code
  blocks.
This commit is contained in:
Dan Brown
2021-06-22 21:02:18 +01:00
parent 9af636bd48
commit 57ea2e92ec
10 changed files with 344 additions and 112 deletions

View File

@ -5,6 +5,7 @@ use Illuminate\Support\Collection;
/**
* Class Chapter
* @property Collection<Page> $pages
* @property mixed description
*/
class Chapter extends BookChild
{

View File

@ -3,13 +3,12 @@
use BookStack\Entities\Models\Book;
use BookStack\Entities\Models\Chapter;
use BookStack\Entities\Models\Page;
use BookStack\Entities\Tools\Markdown\HtmlToMarkdown;
use BookStack\Uploads\ImageService;
use DomPDF;
use Exception;
use SnappyPDF;
use League\HTMLToMarkdown\HtmlConverter;
use Throwable;
use ZipArchive;
class ExportFormatter
{
@ -231,23 +230,20 @@ class ExportFormatter
/**
* Convert a page to a Markdown file.
* @throws Throwable
*/
public function pageToMarkdown(Page $page)
public function pageToMarkdown(Page $page): string
{
if (property_exists($page, 'markdown') && $page->markdown != '') {
if ($page->markdown) {
return "# " . $page->name . "\n\n" . $page->markdown;
} else {
$converter = new HtmlConverter();
return "# " . $page->name . "\n\n" . $converter->convert($page->html);
}
return "# " . $page->name . "\n\n" . (new HtmlToMarkdown($page->html))->convert();
}
/**
* Convert a chapter to a Markdown file.
* @throws Throwable
*/
public function chapterToMarkdown(Chapter $chapter)
public function chapterToMarkdown(Chapter $chapter): string
{
$text = "# " . $chapter->name . "\n\n";
$text .= $chapter->description . "\n\n";
@ -265,7 +261,7 @@ class ExportFormatter
$bookTree = (new BookContents($book))->getTree(false, true);
$text = "# " . $book->name . "\n\n";
foreach ($bookTree as $bookChild) {
if ($bookChild->isA('chapter')) {
if ($bookChild instanceof Chapter) {
$text .= $this->chapterToMarkdown($bookChild);
} else {
$text .= $this->pageToMarkdown($bookChild);
@ -273,27 +269,4 @@ class ExportFormatter
}
return $text;
}
/**
* Convert a book into a zip file.
*/
public function bookToZip(Book $book): string
{
// TODO: Is not unlinking the file a security risk?
$z = new ZipArchive();
$z->open("book.zip", \ZipArchive::CREATE | \ZipArchive::OVERWRITE);
$bookTree = (new BookContents($book))->getTree(false, true);
foreach ($bookTree as $bookChild) {
if ($bookChild->isA('chapter')) {
$z->addEmptyDir($bookChild->name);
foreach ($bookChild->pages as $page) {
$filename = $bookChild->name . "/" . $page->name . ".md";
$z->addFromString($filename, $this->pageToMarkdown($page));
}
} else {
$z->addFromString($bookChild->name . ".md", $this->pageToMarkdown($bookChild));
}
}
return "book.zip";
}
}

View File

@ -0,0 +1,17 @@
<?php namespace BookStack\Entities\Tools\Markdown;
use League\HTMLToMarkdown\Converter\ParagraphConverter;
use League\HTMLToMarkdown\ElementInterface;
class CustomParagraphConverter extends ParagraphConverter
{
public function convert(ElementInterface $element): string
{
$class = $element->getAttribute('class');
if (strpos($class, 'callout') !== false) {
return "<{$element->getTagName()} class=\"{$class}\">{$element->getValue()}</{$element->getTagName()}>\n\n";
}
return parent::convert($element);
}
}

View File

@ -0,0 +1,76 @@
<?php namespace BookStack\Entities\Tools\Markdown;
use League\HTMLToMarkdown\Converter\BlockquoteConverter;
use League\HTMLToMarkdown\Converter\CodeConverter;
use League\HTMLToMarkdown\Converter\CommentConverter;
use League\HTMLToMarkdown\Converter\DivConverter;
use League\HTMLToMarkdown\Converter\EmphasisConverter;
use League\HTMLToMarkdown\Converter\HardBreakConverter;
use League\HTMLToMarkdown\Converter\HeaderConverter;
use League\HTMLToMarkdown\Converter\HorizontalRuleConverter;
use League\HTMLToMarkdown\Converter\ImageConverter;
use League\HTMLToMarkdown\Converter\LinkConverter;
use League\HTMLToMarkdown\Converter\ListBlockConverter;
use League\HTMLToMarkdown\Converter\ListItemConverter;
use League\HTMLToMarkdown\Converter\PreformattedConverter;
use League\HTMLToMarkdown\Converter\TextConverter;
use League\HTMLToMarkdown\Environment;
use League\HTMLToMarkdown\HtmlConverter;
class HtmlToMarkdown
{
protected $html;
public function __construct(string $html)
{
$this->html = $html;
}
/**
* Run the conversion
*/
public function convert(): string
{
$converter = new HtmlConverter($this->getConverterEnvironment());
$html = $this->prepareHtml($this->html);
return $converter->convert($html);
}
/**
* Run any pre-processing to the HTML to clean it up manually before conversion.
*/
protected function prepareHtml(string $html): string
{
// Carriage returns can cause whitespace issues in output
$html = str_replace("\r\n", "\n", $html);
// Attributes on the pre tag can cause issues with conversion
return preg_replace('/<pre .*?>/', '<pre>', $html);
}
/**
* Get the HTML to Markdown customized environment.
* Extends the default provided environment with some BookStack specific tweaks.
*/
protected function getConverterEnvironment(): Environment
{
$environment = new Environment(['header_style' => 'atx']);
$environment->addConverter(new BlockquoteConverter());
$environment->addConverter(new CodeConverter());
$environment->addConverter(new CommentConverter());
$environment->addConverter(new DivConverter());
$environment->addConverter(new EmphasisConverter());
$environment->addConverter(new HardBreakConverter());
$environment->addConverter(new HeaderConverter());
$environment->addConverter(new HorizontalRuleConverter());
$environment->addConverter(new ImageConverter());
$environment->addConverter(new LinkConverter());
$environment->addConverter(new ListBlockConverter());
$environment->addConverter(new ListItemConverter());
$environment->addConverter(new CustomParagraphConverter());
$environment->addConverter(new PreformattedConverter());
$environment->addConverter(new TextConverter());
return $environment;
}
}

View File

@ -59,17 +59,7 @@ class BookExportController extends Controller
public function markdown(string $bookSlug)
{
$book = $this->bookRepo->getBySlug($bookSlug);
$textContent = $this->exportService->bookToMarkdown($book);
$textContent = $this->exportFormatter->bookToMarkdown($book);
return $this->downloadResponse($textContent, $bookSlug . '.md');
}
/**
* Export a book as a zip file, made of markdown files.
*/
public function zip(string $bookSlug)
{
$book = $this->bookRepo->getBySlug($bookSlug);
$filename = $this->exportService->bookToZip($book);
return response()->download($filename);
}
}

View File

@ -63,7 +63,7 @@ class ChapterExportController extends Controller
{
// TODO: This should probably export to a zip file.
$chapter = $this->chapterRepo->getBySlug($bookSlug, $chapterSlug);
$chapterText = $this->exportService->chapterToMarkdown($chapter);
$chapterText = $this->exportFormatter->chapterToMarkdown($chapter);
return $this->downloadResponse($chapterText, $chapterSlug . '.md');
}
}

View File

@ -68,7 +68,7 @@ class PageExportController extends Controller
public function markdown(string $bookSlug, string $pageSlug)
{
$page = $this->pageRepo->getBySlug($bookSlug, $pageSlug);
$pageText = $this->exportService->pageToMarkdown($page);
$pageText = $this->exportFormatter->pageToMarkdown($page);
return $this->downloadResponse($pageText, $pageSlug . '.md');
}
}