embedded_image_directive.py 7.39 KB
Newer Older
1 2
# -*- coding: utf-8 -*-

3
import base64
4
import requests
5 6 7 8
import os
import shutil
import subprocess
import tempfile
9
import urlparse
10
import sys
11
from io import BytesIO
12 13 14 15 16 17
from PIL import Image
from docutils import nodes
from docutils.parsers.rst import Directive
from docutils.parsers.rst import directives, states
from docutils.nodes import fully_normalize_name, whitespace_normalize_name
from docutils.parsers.rst.roles import set_classes
18 19 20 21
from .output_mode import get_output_mode


TMP_IMAGE_FILENAME = "_downloaded_image{i:04d}.{ext}"
22
TMP_QPDF_OUT_FILENAME = "_qpdf_out.pdf"
23
OUT_PDF_FILENAME = "_converted_image{i:04d}.pdf"
24
OUT_PNG_FILENAME = "_converted_image{i:04d}.png"
25 26


27 28 29 30
def is_url(url):
    return urlparse.urlparse(url).scheme not in ("", "file")


31 32 33
def load_and_convert_image(
    image_filepath_or_url, convert_to_base64=True, convert_svg_to_pdf=False, convert_gif_to_png=False
):
34 35 36 37 38 39 40
    self = load_and_convert_image
    if not hasattr(self, "image_number"):
        self.image_number = 1
    image = None
    image_content = None
    image_filepath = None
    image_is_pdf = None
41
    image_is_png = None
42
    image_is_svg = image_filepath_or_url.lower().endswith(".svg")
43 44
    image_is_gif = image_filepath_or_url.lower().endswith(".gif")
    image_will_be_converted = (image_is_svg and convert_svg_to_pdf) or (image_is_gif and convert_gif_to_png)
45
    image_is_remote = is_url(image_filepath_or_url)
46
    if image_will_be_converted and (image_is_remote or convert_to_base64):
47 48 49 50
        tmp_dir = tempfile.mkdtemp()
    else:
        tmp_dir = None
    if image_is_remote:
51 52
        image_url = image_filepath_or_url
        response = requests.get(image_url)
53 54
        response.raise_for_status()
        image_content = response.content
55 56
        if not convert_to_base64 or image_will_be_converted:
            image_filepath = TMP_IMAGE_FILENAME.format(i=self.image_number, ext=os.path.splitext(image_url)[1][1:])
57 58 59 60
            if tmp_dir is not None:
                image_filepath = os.path.join(tmp_dir, image_filepath)
            with open(image_filepath.encode(sys.getfilesystemencoding()), "wb") as image_file:
                image_file.write(image_content)
61
    else:
62
        image_filepath = image_filepath_or_url
63
    try:
64 65 66 67 68 69 70
        if image_will_be_converted:
            if image_is_svg:
                image_content = None
                out_pdf_filename = OUT_PDF_FILENAME.format(i=self.image_number)
                if convert_to_base64:
                    out_pdf_filename = os.path.join(tmp_dir, out_pdf_filename)
                with open(os.devnull, "w") as devnull:
71
                    subprocess.check_call(
72 73 74
                        ["rsvg-convert", "-f", "pdf", "-o", out_pdf_filename, image_filepath],
                        stdout=devnull,
                        stderr=devnull,
75
                    )
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
                    try:
                        subprocess.check_call(
                            ["qpdf", out_pdf_filename, TMP_QPDF_OUT_FILENAME], stdout=devnull, stderr=devnull
                        )
                        os.remove(out_pdf_filename)
                        shutil.move(TMP_QPDF_OUT_FILENAME, out_pdf_filename)
                    except subprocess.CalledProcessError:
                        pass
                image_filepath = out_pdf_filename
                image_is_svg = False
                image_is_pdf = True
            elif image_is_gif:
                image_content = None
                out_png_filename = OUT_PNG_FILENAME.format(i=self.image_number)
                if convert_to_base64:
                    out_png_filename = os.path.join(tmp_dir, out_png_filename)
                with open(os.devnull, "w") as devnull:
                    subprocess.check_call(["convert", image_filepath, out_png_filename], stdout=devnull, stderr=devnull)
                image_filepath = out_png_filename
                image_is_gif = False
                image_is_png = True
97 98 99 100 101 102 103 104
        if image_content is None:
            with open(image_filepath.encode(sys.getfilesystemencoding()), "rb") as image_file:
                image_content = image_file.read()
    finally:
        if tmp_dir is not None:
            shutil.rmtree(tmp_dir)
            tmp_dir = None
    try:
105 106
        image = Image.open(BytesIO(image_content))
        image_format = image.format.lower()
107 108 109 110 111 112 113 114 115 116 117 118 119
    except IOError:
        if image_is_svg:
            image_format = "svg+xml"
        elif image_is_pdf:
            image_format = "pdf"
        else:
            image_format = "unknown"
    if convert_to_base64:
        image_reference = "data:image/{format};base64,".format(format=image_format) + base64.b64encode(image_content)
    else:
        image_reference = image_filepath
    self.image_number += 1
    return image, image_reference
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146


class EmbeddedImage(Directive):
    align_values = ("left", "center", "right")

    required_arguments = 1
    optional_arguments = 0
    option_spec = {
        "alt": directives.unchanged,
        "height": directives.length_or_unitless,
        "width": directives.length_or_percentage_or_unitless,
        "scale": directives.percentage,
        "align": lambda argument: directives.choice(argument, EmbeddedImage.align_values),
        "name": directives.unchanged,
        "target": directives.unchanged_required,
        "class": directives.class_option,
    }
    has_content = True

    def run(self):
        image_filepath = self.arguments[0]
        if "align" in self.options and self.options["align"] not in self.align_values:
            raise self.error(
                'Error in "%s" directive: "%s" is not a valid value for '
                'the "align" option.  Valid values for "align" are: "%s".'
                % (self.name, self.options["align"], '", "'.join(self.align_values))
            )
147
        is_output_mode_html = get_output_mode() == "html"
148
        messages = []
149 150
        image = None
        image, image_reference = load_and_convert_image(
151 152 153 154
            image_filepath,
            convert_to_base64=is_output_mode_html,
            convert_svg_to_pdf=not is_output_mode_html,
            convert_gif_to_png=not is_output_mode_html,
155 156
        )
        reference = directives.uri(image_reference)
157
        self.options["uri"] = reference
158
        if not any(attr in self.options for attr in ("height", "width", "scale")) and image is not None:
159
            self.options["width"] = "{}px".format(image.size[0])
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
        reference_node = None
        if "target" in self.options:
            block = states.escape2null(self.options["target"]).splitlines()
            block = [line for line in block]
            target_type, data = self.state.parse_target(block, self.block_text, self.lineno)
            if target_type == "refuri":
                reference_node = nodes.reference(refuri=data)
            elif target_type == "refname":
                reference_node = nodes.reference(
                    refname=fully_normalize_name(data), name=whitespace_normalize_name(data)
                )
                reference_node.indirect_reference_name = data
                self.state.document.note_refname(reference_node)
            else:  # malformed target
                messages.append(data)  # data is a system message
            del self.options["target"]
        set_classes(self.options)
        image_node = nodes.image(self.block_text, **self.options)
        self.add_name(image_node)
        if reference_node:
            reference_node += image_node
            return messages + [reference_node]
        else:
            return messages + [image_node]