diff --git a/build/single_file_libs/combine.py b/build/single_file_libs/combine.py new file mode 100755 index 000000000..994b36b1d --- /dev/null +++ b/build/single_file_libs/combine.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 + +# Tool to bundle multiple C/C++ source files, inlining any includes. +# +# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain) + +import argparse, os, re, sys + +from pathlib import Path + +# File roots when searching (equivalent to -I paths for the compiler). +roots = set() + +# File Path objects previously inlined. +found = set() + +# Destination file object (or stdout if no output file was supplied). +destn = None + +# Regex to handle the following type of file includes: +# +# #include "file" +# #include "file" +# # include "file" +# #include "file" +# #include "file" // comment +# #include "file" // comment with quote " +# +# And all combinations of, as well as ignoring the following: +# +# #include +# //#include "file" +# /*#include "file"*/ +# +# We don't try to catch errors since the compiler will do this (and the code is +# expected to be valid before processing) and we don't care what follows the +# file (whether it's a valid comment or not, since anything after the quoted +# string is ignored) +# +include_regex = re.compile(r'^\s*#\s*include\s*"(.+?)"') + +# Simple tests to prove include_regex's cases. +# +def test_match_include(): + if (include_regex.match('#include "file"') and + include_regex.match(' #include "file"') and + include_regex.match('# include "file"') and + include_regex.match('#include "file"') and + include_regex.match('#include "file" // comment')): + if (not include_regex.match('#include ') and + not include_regex.match('//#include "file"') and + not include_regex.match('/*#include "file"*/')): + found = include_regex.match('#include "file" // "') + if (found and found.group(1) == 'file'): + print('#include match valid') + return True + return False + +# Regex to handle "#pragma once" in various formats: +# +# #pragma once +# #pragma once +# # pragma once +# #pragma once +# #pragma once // comment +# +# Ignoring commented versions, same as include_regex. +# +pragma_regex = re.compile(r'^\s*#\s*pragma\s*once\s*') + +# Simple tests to prove pragma_regex's cases. +# +def text_match_pragma(): + if (pragma_regex.match('#pragma once') and + pragma_regex.match(' #pragma once') and + pragma_regex.match('# pragma once') and + pragma_regex.match('#pragma once') and + pragma_regex.match('#pragma once // comment')): + if (not pragma_regex.match('//#pragma once') and + not pragma_regex.match('/*#pragma once*/')): + print('#pragma once match valid') + return True + return False + +# Finds 'file'. First the currently processing file's 'parent' path is looked at +# for a match, followed by the list of 'root', returning a valid Path in +# canonical form. If no match is found None is returned. +# +def resolve_include(parent: Path, file: str): + found = parent.joinpath(file).resolve(); + if (found.is_file()): + return found + for root in roots: + found = root.joinpath(file).resolve() + if (found.is_file()): + return found + return None + +# Writes 'line' to the open file 'destn' (or stdout). +# +def write_line(line): + print(line, file=destn) + +# Logs 'line' to stderr. +# +def log_line(line): + print(line, file=sys.stderr) + +def add_file(file): + if (isinstance(file, Path) and file.is_file()): + log_line(f'Processing: {file}') + with file.open('r') as opened: + for line in opened: + line = line.rstrip('\n') + match_include = include_regex.match(line); + if (match_include): + inc_name = match_include.group(1) + resolved = resolve_include(file.parent, inc_name) + if (resolved not in found): + # The file was not previously encountered + found.add(resolved) + write_line(f'/**** start inlining {inc_name} ****/') + add_file(resolved) + write_line(f'/**** ended inlining {inc_name} ****/') + else: + write_line(f'/**** skipping file: {inc_name} ****/') + else: + if (not pragma_regex.match(line)): + write_line(line) + else: + log_line(f'Error: Unable to find: {file}') + + +parser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c') +parser.add_argument('-r', '--root', action='append', type=Path, help='file root search path') +parser.add_argument('-x', '--exclude', action='append', help='file to completely exclude from inlining') +parser.add_argument('-k', '--keep', action='append', help='file to exclude from inlining but keep the include directive') +parser.add_argument('-p', '--pragma', action='store_true', default=False, help='keep any "#pragma once" directives (removed by default)') +parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='output file (otherwise stdout)') +parser.add_argument('input', type=Path, help='input file') +args = parser.parse_args() + +# Resolve all of the root paths upfront (we'll halt here on invalid roots) +if (args.root is not None): + for path in args.root: + roots.add(path.resolve(strict=True)) + +try: + if (args.output is None): + destn = sys.stdout + else: + destn = args.output + add_file(args.input) +finally: + if (destn is not None): + destn.close() diff --git a/build/single_file_libs/create_single_file_decoder.sh b/build/single_file_libs/create_single_file_decoder.sh index b5f5613ae..1c8841d18 100755 --- a/build/single_file_libs/create_single_file_decoder.sh +++ b/build/single_file_libs/create_single_file_decoder.sh @@ -5,7 +5,12 @@ ZSTD_SRC_ROOT="../../lib" # Amalgamate the sources echo "Amalgamating files... this can take a while" -./combine.sh -r "$ZSTD_SRC_ROOT" -o zstddeclib.c zstddeclib-in.c +# Using the faster Python script if we have 3.8 or higher +if python3 -c 'import sys; assert sys.version_info >= (3,8)' 2>/dev/null; then + ./combine.py -r "$ZSTD_SRC_ROOT" -o zstddeclib.c zstddeclib-in.c +else + ./combine.sh -r "$ZSTD_SRC_ROOT" -o zstddeclib.c zstddeclib-in.c +fi # Did combining work? if [ $? -ne 0 ]; then echo "Combine script: FAILED" diff --git a/build/single_file_libs/create_single_file_library.sh b/build/single_file_libs/create_single_file_library.sh index 6f38526d5..9b2f22a9d 100755 --- a/build/single_file_libs/create_single_file_library.sh +++ b/build/single_file_libs/create_single_file_library.sh @@ -5,7 +5,12 @@ ZSTD_SRC_ROOT="../../lib" # Amalgamate the sources echo "Amalgamating files... this can take a while" -./combine.sh -r "$ZSTD_SRC_ROOT" -o zstd.c zstd-in.c +# Using the faster Python script if we have 3.8 or higher +if python3 -c 'import sys; assert sys.version_info >= (3,8)' 2>/dev/null; then + ./combine.py -r "$ZSTD_SRC_ROOT" -o zstd.c zstd-in.c +else + ./combine.sh -r "$ZSTD_SRC_ROOT" -o zstd.c zstd-in.c +fi # Did combining work? if [ $? -ne 0 ]; then echo "Combine script: FAILED"