Skip to content

Support external renderers to PDF (#17635) #21098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions docs/content/doc/advanced/external-renderers.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,35 @@ RENDER_COMMAND = "jupyter-nbconvert --stdin --stdout --to html --template basic"
ALLOW_DATA_URI_IMAGES = true
```

### Example: Office PPTX

Convert Office PPTX files to PDF using
[LibreOffice CLI](https://help.libreoffice.org/latest/en-US/text/shared/guide/start_parameters.html):

```ini
[markup.pptx]
ENABLED = true
FILE_EXTENSIONS = .pptx
IS_INPUT_FILE = true
RENDER_COMMAND = ./convert-pptx.sh
RENDER_CONTENT_MODE = pdf
```

The script `convert-pptx.sh`:

```sh
#!/usr/bin/env sh
set -eu
file="$1"
dir=`mktemp -d`
libreoffice --convert-to pdf "$file" --outdir "$dir"
cat "$dir/$(basename $file .pptx).pdf"
rm -rf "$dir"
```

Using `RENDER_CONTENT_MODE = pdf` makes Gitea to embed files into a PDF viewer.
It is mutually exclusive with post-processing and sanitization.

## Customizing CSS

The external renderer is specified in the .ini in the format `[markup.XXXXX]` and the HTML supplied by your external renderer will be wrapped in a `<div>` with classes `markup` and `XXXXX`. The `markup` class provides out of the box styling (as does `markdown` if `XXXXX` is `markdown`). Otherwise you can use these classes to specifically target the contents of your rendered HTML.
Expand Down
8 changes: 7 additions & 1 deletion modules/markup/external/external.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,20 @@ func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule {

// SanitizerDisabled disabled sanitize if return true
func (p *Renderer) SanitizerDisabled() bool {
return p.RenderContentMode == setting.RenderContentModeNoSanitizer || p.RenderContentMode == setting.RenderContentModeIframe
return p.RenderContentMode == setting.RenderContentModeNoSanitizer ||
p.RenderContentMode == setting.RenderContentModeIframe ||
p.RenderContentMode == setting.RenderContentModePDF
}

// DisplayInIFrame represents whether render the content with an iframe
func (p *Renderer) DisplayInIFrame() bool {
return p.RenderContentMode == setting.RenderContentModeIframe
}

func (p *Renderer) DisplayAsPDF() bool {
return p.RenderContentMode == setting.RenderContentModePDF
}

func envMark(envName string) string {
if runtime.GOOS == "windows" {
return "%" + envName + "%"
Expand Down
40 changes: 30 additions & 10 deletions modules/markup/renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ type ExternalRenderer interface {

// DisplayInIFrame represents whether render the content with an iframe
DisplayInIFrame() bool

// DisplayAsPDF represents whether to the renderer output should be viewed as PDF.
DisplayAsPDF() bool
}

// RendererContentDetector detects if the content can be rendered
Expand Down Expand Up @@ -177,23 +180,38 @@ type nopCloser struct {

func (nopCloser) Close() error { return nil }

func getRenderURL(ctx *RenderContext) string {
return fmt.Sprintf("%s/%s/%s/render/%s/%s",
setting.AppSubURL,
url.PathEscape(ctx.Metas["user"]),
url.PathEscape(ctx.Metas["repo"]),
ctx.Metas["BranchNameSubURL"],
url.PathEscape(ctx.RelativePath),
)
}

func renderIFrame(ctx *RenderContext, output io.Writer) error {
// set height="0" ahead, otherwise the scrollHeight would be max(150, realHeight)
// at the moment, only "allow-scripts" is allowed for sandbox mode.
// "allow-same-origin" should never be used, it leads to XSS attack, and it makes the JS in iframe can access parent window's config and CSRF token
// TODO: when using dark theme, if the rendered content doesn't have proper style, the default text color is black, which is not easy to read
_, err := io.WriteString(output, fmt.Sprintf(`
<iframe src="%s/%s/%s/render/%s/%s"
<iframe src="%s"
name="giteaExternalRender"
onload="this.height=giteaExternalRender.document.documentElement.scrollHeight"
width="100%%" height="0" scrolling="no" frameborder="0" style="overflow: hidden"
sandbox="allow-scripts"
></iframe>`,
setting.AppSubURL,
url.PathEscape(ctx.Metas["user"]),
url.PathEscape(ctx.Metas["repo"]),
ctx.Metas["BranchNameSubURL"],
url.PathEscape(ctx.RelativePath),
getRenderURL(ctx),
))
return err
}

func renderPDFViewer(ctx *RenderContext, output io.Writer) error {
_, err := io.WriteString(output, fmt.Sprintf(`
<iframe width="100%%" height="600px" src="%s/vendor/plugins/pdfjs/web/viewer.html?file=%s"></iframe>`,
setting.StaticURLPrefix+"/assets",
getRenderURL(ctx),
))
return err
}
Expand Down Expand Up @@ -281,11 +299,13 @@ func (err ErrUnsupportedRenderExtension) Error() string {
func renderFile(ctx *RenderContext, input io.Reader, output io.Writer) error {
extension := strings.ToLower(filepath.Ext(ctx.RelativePath))
if renderer, ok := extRenderers[extension]; ok {
if r, ok := renderer.(ExternalRenderer); ok && r.DisplayInIFrame() {
if !ctx.InStandalonePage {
// for an external render, it could only output its content in a standalone page
// otherwise, a <iframe> should be outputted to embed the external rendered page
if r, ok := renderer.(ExternalRenderer); ok && !ctx.InStandalonePage {
// for an external render, it could only output its content in a standalone page
// otherwise, the output may need to be embedded into an <iframe> or a viewer
if r.DisplayInIFrame() {
return renderIFrame(ctx, output)
} else if r.DisplayAsPDF() {
return renderPDFViewer(ctx, output)
}
}
return render(ctx, renderer, input, output)
Expand Down
15 changes: 12 additions & 3 deletions modules/setting/markup.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"strings"

"code.gitea.io/gitea/modules/log"

"gopkg.in/ini.v1"
)

Expand All @@ -24,6 +23,7 @@ const (
RenderContentModeSanitized = "sanitized"
RenderContentModeNoSanitizer = "no-sanitizer"
RenderContentModeIframe = "iframe"
RenderContentModePDF = "pdf"
)

// MarkupRenderer defines the external parser configured in ini
Expand Down Expand Up @@ -160,18 +160,27 @@ func newMarkupRenderer(name string, sec *ini.Section) {
}
if renderContentMode != RenderContentModeSanitized &&
renderContentMode != RenderContentModeNoSanitizer &&
renderContentMode != RenderContentModeIframe {
renderContentMode != RenderContentModeIframe &&
renderContentMode != RenderContentModePDF {
log.Error("invalid RENDER_CONTENT_MODE: %q, default to %q", renderContentMode, RenderContentModeSanitized)
renderContentMode = RenderContentModeSanitized
}

needPostProcessDefault := renderContentMode != RenderContentModePDF
needPostProcess := sec.Key("NEED_POSTPROCESS").MustBool(needPostProcessDefault)
if needPostProcess && renderContentMode == RenderContentModePDF {
log.Error("NEED_POSTPROCESS: %q is incompatible with RENDER_CONTENT_MODE: %q, default to %q",
needPostProcess, renderContentMode, false)
needPostProcess = false
}

ExternalMarkupRenderers = append(ExternalMarkupRenderers, &MarkupRenderer{
Enabled: sec.Key("ENABLED").MustBool(false),
MarkupName: name,
FileExtensions: exts,
Command: command,
IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false),
NeedPostProcess: sec.Key("NEED_POSTPROCESS").MustBool(true),
NeedPostProcess: needPostProcess,
RenderContentMode: renderContentMode,
})
}
5 changes: 4 additions & 1 deletion routers/web/repo/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ func RenderFile(ctx *context.Context) {
st := typesniffer.DetectContentType(buf)
isTextFile := st.IsText()

rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc))
rd := io.MultiReader(bytes.NewReader(buf), dataRc)
if isTextFile {
rd = charset.ToUTF8WithFallbackReader(rd)
}

if markupType := markup.Type(blob.Name()); markupType == "" {
if isTextFile {
Expand Down
7 changes: 4 additions & 3 deletions routers/web/repo/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,9 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
ctx.Data["EditFileTooltip"] = ctx.Tr("repo.editor.cannot_edit_non_text_files")
}

metas := ctx.Repo.Repository.ComposeDocumentMetas()
metas["BranchNameSubURL"] = ctx.Repo.BranchNameSubURL()

switch {
case isRepresentableAsText:
if st.IsSvgImage() {
Expand Down Expand Up @@ -498,8 +501,6 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
if !detected {
markupType = ""
}
metas := ctx.Repo.Repository.ComposeDocumentMetas()
metas["BranchNameSubURL"] = ctx.Repo.BranchNameSubURL()
ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{
Ctx: ctx,
Type: markupType,
Expand Down Expand Up @@ -613,7 +614,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
Ctx: ctx,
RelativePath: ctx.Repo.TreePath,
URLPrefix: path.Dir(treeLink),
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
Metas: metas,
GitRepo: ctx.Repo.GitRepo,
}, rd)
if err != nil {
Expand Down