mirror of
				https://sourceware.org/git/glibc.git
				synced 2025-10-30 10:45:40 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			218 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			218 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/python3
 | |
| # Check that use of symbols declared in a given header does not result
 | |
| # in any symbols being brought in that are not reserved with external
 | |
| # linkage for the given standard.
 | |
| # Copyright (C) 2014-2023 Free Software Foundation, Inc.
 | |
| # This file is part of the GNU C Library.
 | |
| #
 | |
| # The GNU C Library is free software; you can redistribute it and/or
 | |
| # modify it under the terms of the GNU Lesser General Public
 | |
| # License as published by the Free Software Foundation; either
 | |
| # version 2.1 of the License, or (at your option) any later version.
 | |
| #
 | |
| # The GNU C Library is distributed in the hope that it will be useful,
 | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| # Lesser General Public License for more details.
 | |
| #
 | |
| # You should have received a copy of the GNU Lesser General Public
 | |
| # License along with the GNU C Library; if not, see
 | |
| # <https://www.gnu.org/licenses/>.
 | |
| 
 | |
| import argparse
 | |
| from collections import defaultdict
 | |
| import os.path
 | |
| import re
 | |
| import subprocess
 | |
| import sys
 | |
| import tempfile
 | |
| 
 | |
| import glibcconform
 | |
| 
 | |
| # The following whitelisted symbols are also allowed for now.
 | |
| #
 | |
| # * Bug 17576: stdin, stdout, stderr only reserved with external
 | |
| # linkage when stdio.h included (and possibly not then), not
 | |
| # generally.
 | |
| #
 | |
| # * Bug 18442: re_syntax_options wrongly brought in by regcomp and
 | |
| # used by re_comp.
 | |
| #
 | |
| WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'}
 | |
| 
 | |
| 
 | |
| def list_syms(filename):
 | |
|     """Return information about GLOBAL and WEAK symbols listed in readelf
 | |
|     -s output."""
 | |
|     ret = []
 | |
|     cur_file = filename
 | |
|     with open(filename, 'r') as syms_file:
 | |
|         for line in syms_file:
 | |
|             line = line.rstrip()
 | |
|             if line.startswith('File: '):
 | |
|                 cur_file = line[len('File: '):]
 | |
|                 cur_file = cur_file.split('/')[-1]
 | |
|                 continue
 | |
|             # Architecture-specific st_other bits appear inside [] and
 | |
|             # disrupt the format of readelf output.
 | |
|             line = re.sub(r'\[.*?\]', '', line)
 | |
|             fields = line.split()
 | |
|             if len(fields) < 8:
 | |
|                 continue
 | |
|             bind = fields[4]
 | |
|             ndx = fields[6]
 | |
|             sym = fields[7]
 | |
|             if bind not in ('GLOBAL', 'WEAK'):
 | |
|                 continue
 | |
|             if not re.fullmatch('[A-Za-z0-9_]+', sym):
 | |
|                 continue
 | |
|             ret.append((cur_file, sym, bind, ndx != 'UND'))
 | |
|     return ret
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     """The main entry point."""
 | |
|     parser = argparse.ArgumentParser(description='Check link-time namespace.')
 | |
|     parser.add_argument('--header', metavar='HEADER',
 | |
|                         help='name of header')
 | |
|     parser.add_argument('--standard', metavar='STD',
 | |
|                         help='standard to use when processing header')
 | |
|     parser.add_argument('--cc', metavar='CC',
 | |
|                         help='C compiler to use')
 | |
|     parser.add_argument('--flags', metavar='CFLAGS',
 | |
|                         help='Compiler flags to use with CC')
 | |
|     parser.add_argument('--stdsyms', metavar='FILE',
 | |
|                         help='File with list of standard symbols')
 | |
|     parser.add_argument('--libsyms', metavar='FILE',
 | |
|                         help='File with symbol information from libraries')
 | |
|     parser.add_argument('--readelf', metavar='READELF',
 | |
|                         help='readelf program to use')
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     # Load the list of symbols that are OK.
 | |
|     stdsyms = set()
 | |
|     with open(args.stdsyms, 'r') as stdsyms_file:
 | |
|         for line in stdsyms_file:
 | |
|             stdsyms.add(line.rstrip())
 | |
|     stdsyms |= WHITELIST
 | |
| 
 | |
|     # Load information about GLOBAL and WEAK symbols defined or used
 | |
|     # in the standard libraries.
 | |
|     # Symbols from a given object, except for weak defined symbols.
 | |
|     seen_syms = defaultdict(list)
 | |
|     # Strong undefined symbols from a given object.
 | |
|     strong_undef_syms = defaultdict(list)
 | |
|     # Objects defining a given symbol (strongly or weakly).
 | |
|     sym_objs = defaultdict(list)
 | |
|     for file, name, bind, defined in list_syms(args.libsyms):
 | |
|         if defined:
 | |
|             sym_objs[name].append(file)
 | |
|         if bind == 'GLOBAL' or not defined:
 | |
|             seen_syms[file].append(name)
 | |
|         if bind == 'GLOBAL' and not defined:
 | |
|             strong_undef_syms[file].append(name)
 | |
| 
 | |
|     # Determine what ELF-level symbols are brought in by use of C-level
 | |
|     # symbols declared in the given header.
 | |
|     #
 | |
|     # The rules followed are heuristic and so may produce false
 | |
|     # positives and false negatives.
 | |
|     #
 | |
|     # * All undefined symbols are considered of signficance, but it is
 | |
|     # possible that (a) any standard library definition is weak, so
 | |
|     # can be overridden by the user's definition, and (b) the symbol
 | |
|     # is only used conditionally and not if the program is limited to
 | |
|     # standard functionality.
 | |
|     #
 | |
|     # * If a symbol reference is only brought in by the user using a
 | |
|     # data symbol rather than a function from the standard library,
 | |
|     # this will not be detected.
 | |
|     #
 | |
|     # * If a symbol reference is only brought in by crt*.o or libgcc,
 | |
|     # this will not be detected.
 | |
|     #
 | |
|     # * If a symbol reference is only brought in through __builtin_foo
 | |
|     # in a standard macro being compiled to call foo, this will not be
 | |
|     # detected.
 | |
|     #
 | |
|     # * Header inclusions should be compiled several times with
 | |
|     # different options such as -O2, -D_FORTIFY_SOURCE and
 | |
|     # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined
 | |
|     # from such a compilation; this is not yet implemented.
 | |
|     #
 | |
|     # * This script finds symbols referenced through use of macros on
 | |
|     # the basis that if a macro calls an internal function, that
 | |
|     # function must also be declared in the header.  However, the
 | |
|     # header might also declare implementation-namespace functions
 | |
|     # that are not called by any standard macro in the header,
 | |
|     # resulting in false positives for any symbols brought in only
 | |
|     # through use of those implementation-namespace functions.
 | |
|     #
 | |
|     # * Namespace issues can apply for dynamic linking as well as
 | |
|     # static linking, when a call is from one shared library to
 | |
|     # another or uses a PLT entry for a call within a shared library;
 | |
|     # such issues are only detected by this script if the same
 | |
|     # namespace issue applies for static linking.
 | |
|     seen_where = {}
 | |
|     files_seen = set()
 | |
|     all_undef = {}
 | |
|     current_undef = {}
 | |
|     compiler = '%s %s' % (args.cc, args.flags)
 | |
|     c_syms = glibcconform.list_exported_functions(compiler, args.standard,
 | |
|                                                   args.header)
 | |
|     with tempfile.TemporaryDirectory() as temp_dir:
 | |
|         cincfile_name = os.path.join(temp_dir, 'undef.c')
 | |
|         cincfile_o_name = os.path.join(temp_dir, 'undef.o')
 | |
|         cincfile_sym_name = os.path.join(temp_dir, 'undef.sym')
 | |
|         cincfile_text = ('#include <%s>\n%s\n'
 | |
|                          % (args.header,
 | |
|                             '\n'.join('void *__glibc_test_%s = (void *) &%s;'
 | |
|                                       % (sym, sym) for sym in sorted(c_syms))))
 | |
|         with open(cincfile_name, 'w') as cincfile:
 | |
|             cincfile.write(cincfile_text)
 | |
|         cmd = ('%s %s -D_ISOMAC %s -c %s -o %s'
 | |
|                % (args.cc, args.flags, glibcconform.CFLAGS[args.standard],
 | |
|                   cincfile_name, cincfile_o_name))
 | |
|         subprocess.check_call(cmd, shell=True)
 | |
|         cmd = ('LC_ALL=C %s -W -s %s > %s'
 | |
|                % (args.readelf, cincfile_o_name, cincfile_sym_name))
 | |
|         subprocess.check_call(cmd, shell=True)
 | |
|         for file, name, bind, defined in list_syms(cincfile_sym_name):
 | |
|             if bind == 'GLOBAL' and not defined:
 | |
|                 sym_text = '[initial] %s' % name
 | |
|                 seen_where[name] = sym_text
 | |
|                 all_undef[name] = sym_text
 | |
|                 current_undef[name] = sym_text
 | |
| 
 | |
|     while current_undef:
 | |
|         new_undef = {}
 | |
|         for sym, cu_sym in sorted(current_undef.items()):
 | |
|             for file in sym_objs[sym]:
 | |
|                 if file in files_seen:
 | |
|                     continue
 | |
|                 files_seen.add(file)
 | |
|                 for ssym in seen_syms[file]:
 | |
|                     if ssym not in seen_where:
 | |
|                         seen_where[ssym] = ('%s -> [%s] %s'
 | |
|                                             % (cu_sym, file, ssym))
 | |
|                 for usym in strong_undef_syms[file]:
 | |
|                     if usym not in all_undef:
 | |
|                         usym_text = '%s -> [%s] %s' % (cu_sym, file, usym)
 | |
|                         all_undef[usym] = usym_text
 | |
|                         new_undef[usym] = usym_text
 | |
|         current_undef = new_undef
 | |
| 
 | |
|     ret = 0
 | |
|     for sym in sorted(seen_where):
 | |
|         if sym.startswith('_'):
 | |
|             continue
 | |
|         if sym in stdsyms:
 | |
|             continue
 | |
|         print(seen_where[sym])
 | |
|         ret = 1
 | |
|     sys.exit(ret)
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     main()
 |