import re LINE_CONTINUATION_REGEX = re.compile(r"(\s)*\\(\s)*\n") COMMAND_REGEX = re.compile("([A-Za-z]+)\s(.*)") COMMENT_CHARACTER = "#" LATEST_TAG = "latest" class ParsedDockerfile(object): def __init__(self, commands): self.commands = commands def _get_commands_of_kind(self, kind): return [command for command in self.commands if command["command"] == kind] def _get_from_image_identifier(self): from_commands = self._get_commands_of_kind("FROM") if not from_commands: return None return from_commands[-1]["parameters"] @staticmethod def parse_image_identifier(image_identifier): """ Parses a docker image identifier, and returns a tuple of image name and tag, where the tag is filled in with "latest" if left unspecified. """ # Note: # Dockerfile images references can be of multiple forms: # server:port/some/path # somepath # server/some/path # server/some/path:tag # server:port/some/path:tag parts = image_identifier.strip().split(":") if len(parts) == 1: # somepath return (parts[0], LATEST_TAG) # Otherwise, determine if the last part is a port # or a tag. if parts[-1].find("/") >= 0: # Last part is part of the hostname. return (image_identifier, LATEST_TAG) # Remaining cases: # server/some/path:tag # server:port/some/path:tag return (":".join(parts[0:-1]), parts[-1]) def get_base_image(self): """ Return the base image without the tag name. """ return self.get_image_and_tag()[0] def get_image_and_tag(self): """ Returns the image and tag from the FROM line of the dockerfile. """ image_identifier = self._get_from_image_identifier() if image_identifier is None: return (None, None) return self.parse_image_identifier(image_identifier) def strip_comments(contents): lines = [] for line in contents.split("\n"): index = line.find(COMMENT_CHARACTER) if index < 0: lines.append(line) continue line = line[:index] lines.append(line) return "\n".join(lines) def join_continued_lines(contents): return LINE_CONTINUATION_REGEX.sub("", contents) def parse_dockerfile(contents): # If we receive ASCII, translate into unicode. try: contents = contents.decode("utf-8") except ValueError: # Already unicode or unable to convert. pass contents = join_continued_lines(strip_comments(contents)) lines = [line.strip() for line in contents.split("\n") if len(line) > 0] commands = [] for line in lines: match_command = COMMAND_REGEX.match(line) if match_command: command = match_command.group(1).upper() parameters = match_command.group(2) commands.append({"command": command, "parameters": parameters}) return ParsedDockerfile(commands)