import re

LINE_CONTINUATION_REGEX = re.compile(r"(\s)*\\(\s)*\n")
COMMAND_REGEX = re.compile("([A-Za-z]+)\s(.*)")

COMMENT_CHARACTER = "#"
LATEST_TAG = "latest"


class ParsedDockerfile(object):
    def __init__(self, commands):
        self.commands = commands

    def _get_commands_of_kind(self, kind):
        return [command for command in self.commands if command["command"] == kind]

    def _get_from_image_identifier(self):
        from_commands = self._get_commands_of_kind("FROM")
        if not from_commands:
            return None

        return from_commands[-1]["parameters"]

    @staticmethod
    def parse_image_identifier(image_identifier):
        """
        Parses a docker image identifier, and returns a tuple of image name and tag, where the tag
        is filled in with "latest" if left unspecified.
        """
        # Note:
        # Dockerfile images references can be of multiple forms:
        #   server:port/some/path
        #   somepath
        #   server/some/path
        #   server/some/path:tag
        #   server:port/some/path:tag
        parts = image_identifier.strip().split(":")

        if len(parts) == 1:
            # somepath
            return (parts[0], LATEST_TAG)

        # Otherwise, determine if the last part is a port
        # or a tag.
        if parts[-1].find("/") >= 0:
            # Last part is part of the hostname.
            return (image_identifier, LATEST_TAG)

        # Remaining cases:
        #   server/some/path:tag
        #   server:port/some/path:tag
        return (":".join(parts[0:-1]), parts[-1])

    def get_base_image(self):
        """
        Return the base image without the tag name.
        """
        return self.get_image_and_tag()[0]

    def get_image_and_tag(self):
        """
        Returns the image and tag from the FROM line of the dockerfile.
        """
        image_identifier = self._get_from_image_identifier()
        if image_identifier is None:
            return (None, None)

        return self.parse_image_identifier(image_identifier)


def strip_comments(contents):
    lines = []
    for line in contents.split("\n"):
        index = line.find(COMMENT_CHARACTER)
        if index < 0:
            lines.append(line)
            continue

        line = line[:index]
        lines.append(line)

    return "\n".join(lines)


def join_continued_lines(contents):
    return LINE_CONTINUATION_REGEX.sub("", contents)


def parse_dockerfile(contents):
    # If we receive ASCII, translate into unicode.
    try:
        contents = contents.decode("utf-8")
    except ValueError:
        # Already unicode or unable to convert.
        pass

    contents = join_continued_lines(strip_comments(contents))
    lines = [line.strip() for line in contents.split("\n") if len(line) > 0]

    commands = []
    for line in lines:
        match_command = COMMAND_REGEX.match(line)
        if match_command:
            command = match_command.group(1).upper()
            parameters = match_command.group(2)

            commands.append({"command": command, "parameters": parameters})

    return ParsedDockerfile(commands)