From 232c19ad1b4ec571045db0b8633bbd72e0d455b8 Mon Sep 17 00:00:00 2001 From: Dmitry Kozlyuk Date: Tue, 6 Sep 2022 23:31:19 +0300 Subject: [PATCH 1/2] Fix rendering of binary files It was always attempted to read the file to be rendered as UTF-8. The encoding was determined heuristically, regardless of the file type. Sometimes binary files were not recognized as binary. Renderers of binary formats were then fed with corrupted data: binary stream treated as text and encoded in UTF-8. Only apply heuristics for textual formats, read other formats as-is. Fixes: b01dce2a6e98 ("Allow render HTML with css/js external links (#19017)") Cc: xiaolunwen@gmail.com Signed-off-by: Dmitry Kozlyuk --- routers/web/repo/render.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/routers/web/repo/render.go b/routers/web/repo/render.go index 28a6d2f4293cc..d21134c1f97d8 100644 --- a/routers/web/repo/render.go +++ b/routers/web/repo/render.go @@ -44,7 +44,10 @@ func RenderFile(ctx *context.Context) { st := typesniffer.DetectContentType(buf) isTextFile := st.IsText() - rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc)) + rd := io.MultiReader(bytes.NewReader(buf), dataRc) + if isTextFile { + rd = charset.ToUTF8WithFallbackReader(rd) + } if markupType := markup.Type(blob.Name()); markupType == "" { if isTextFile { From 820d6402a14e1cfb8884d81c637cef3ef11ffdab Mon Sep 17 00:00:00 2001 From: Dmitry Kozlyuk Date: Wed, 7 Sep 2022 00:02:53 +0300 Subject: [PATCH 2/2] Support external renderers to PDF (#17635) For some formats, conversion to HTML is unavailable or lossy, while quality conversion of printing to PDF is available. Examples include Office PPTX slides and TeX papers. Add new option value markup.XXX.RENDER_CONTENT_MODE = "pdf". It requires markup.XXX.NEED_POSTPROCESS = false. In this mode, external renderer is only invoked for the standalone page. Embedded rendering outputs a PDF.js widget with a link to that page. Signed-off-by: Dmitry Kozlyuk --- .../doc/advanced/external-renderers.en-us.md | 29 ++++++++++++++ modules/markup/external/external.go | 8 +++- modules/markup/renderer.go | 40 ++++++++++++++----- modules/setting/markup.go | 15 +++++-- routers/web/repo/view.go | 7 ++-- 5 files changed, 82 insertions(+), 17 deletions(-) diff --git a/docs/content/doc/advanced/external-renderers.en-us.md b/docs/content/doc/advanced/external-renderers.en-us.md index 4e5e72554d9d3..8f60387466661 100644 --- a/docs/content/doc/advanced/external-renderers.en-us.md +++ b/docs/content/doc/advanced/external-renderers.en-us.md @@ -158,6 +158,35 @@ RENDER_COMMAND = "jupyter-nbconvert --stdin --stdout --to html --template basic" ALLOW_DATA_URI_IMAGES = true ``` +### Example: Office PPTX + +Convert Office PPTX files to PDF using +[LibreOffice CLI](https://help.libreoffice.org/latest/en-US/text/shared/guide/start_parameters.html): + +```ini +[markup.pptx] +ENABLED = true +FILE_EXTENSIONS = .pptx +IS_INPUT_FILE = true +RENDER_COMMAND = ./convert-pptx.sh +RENDER_CONTENT_MODE = pdf +``` + +The script `convert-pptx.sh`: + +```sh +#!/usr/bin/env sh +set -eu +file="$1" +dir=`mktemp -d` +libreoffice --convert-to pdf "$file" --outdir "$dir" +cat "$dir/$(basename $file .pptx).pdf" +rm -rf "$dir" +``` + +Using `RENDER_CONTENT_MODE = pdf` makes Gitea to embed files into a PDF viewer. +It is mutually exclusive with post-processing and sanitization. + ## Customizing CSS The external renderer is specified in the .ini in the format `[markup.XXXXX]` and the HTML supplied by your external renderer will be wrapped in a `
` with classes `markup` and `XXXXX`. The `markup` class provides out of the box styling (as does `markdown` if `XXXXX` is `markdown`). Otherwise you can use these classes to specifically target the contents of your rendered HTML. diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index 23dd45ba0a1f2..9b6373a999ae6 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -61,7 +61,9 @@ func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule { // SanitizerDisabled disabled sanitize if return true func (p *Renderer) SanitizerDisabled() bool { - return p.RenderContentMode == setting.RenderContentModeNoSanitizer || p.RenderContentMode == setting.RenderContentModeIframe + return p.RenderContentMode == setting.RenderContentModeNoSanitizer || + p.RenderContentMode == setting.RenderContentModeIframe || + p.RenderContentMode == setting.RenderContentModePDF } // DisplayInIFrame represents whether render the content with an iframe @@ -69,6 +71,10 @@ func (p *Renderer) DisplayInIFrame() bool { return p.RenderContentMode == setting.RenderContentModeIframe } +func (p *Renderer) DisplayAsPDF() bool { + return p.RenderContentMode == setting.RenderContentModePDF +} + func envMark(envName string) string { if runtime.GOOS == "windows" { return "%" + envName + "%" diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index 5f69dc72354f0..4815e12c5ddf1 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -106,6 +106,9 @@ type ExternalRenderer interface { // DisplayInIFrame represents whether render the content with an iframe DisplayInIFrame() bool + + // DisplayAsPDF represents whether to the renderer output should be viewed as PDF. + DisplayAsPDF() bool } // RendererContentDetector detects if the content can be rendered @@ -177,23 +180,38 @@ type nopCloser struct { func (nopCloser) Close() error { return nil } +func getRenderURL(ctx *RenderContext) string { + return fmt.Sprintf("%s/%s/%s/render/%s/%s", + setting.AppSubURL, + url.PathEscape(ctx.Metas["user"]), + url.PathEscape(ctx.Metas["repo"]), + ctx.Metas["BranchNameSubURL"], + url.PathEscape(ctx.RelativePath), + ) +} + func renderIFrame(ctx *RenderContext, output io.Writer) error { // set height="0" ahead, otherwise the scrollHeight would be max(150, realHeight) // at the moment, only "allow-scripts" is allowed for sandbox mode. // "allow-same-origin" should never be used, it leads to XSS attack, and it makes the JS in iframe can access parent window's config and CSRF token // TODO: when using dark theme, if the rendered content doesn't have proper style, the default text color is black, which is not easy to read _, err := io.WriteString(output, fmt.Sprintf(` -`, - setting.AppSubURL, - url.PathEscape(ctx.Metas["user"]), - url.PathEscape(ctx.Metas["repo"]), - ctx.Metas["BranchNameSubURL"], - url.PathEscape(ctx.RelativePath), + getRenderURL(ctx), + )) + return err +} + +func renderPDFViewer(ctx *RenderContext, output io.Writer) error { + _, err := io.WriteString(output, fmt.Sprintf(` +`, + setting.StaticURLPrefix+"/assets", + getRenderURL(ctx), )) return err } @@ -281,11 +299,13 @@ func (err ErrUnsupportedRenderExtension) Error() string { func renderFile(ctx *RenderContext, input io.Reader, output io.Writer) error { extension := strings.ToLower(filepath.Ext(ctx.RelativePath)) if renderer, ok := extRenderers[extension]; ok { - if r, ok := renderer.(ExternalRenderer); ok && r.DisplayInIFrame() { - if !ctx.InStandalonePage { - // for an external render, it could only output its content in a standalone page - // otherwise, a