#Portable C++/11 implementation and test of the BSD genassym and genoffset #functionality. In a traditional BSD setting, genassym/genoffset are AWK based #Unix shell scripts that rely on parsing the output from the "nm" command. #Here we instead implement those scripts as short C++/11 programs which tap #into the functionality of the LLVM libraries, thereby reducing the need for #the traditional Unix "Pandoras box" of command line tools (nm and friends). #These short C++ programs will run on any platform supported by LLVM #(BSD/Linux/Windows), and will help us build the BSD kernel on ANY host #platform supported by cmake using ANY supported toolchain that is able to #compile the BSD sources (for example "Visual Studio with a clang frontend", #"standalone clang" or "gcc"). This in turn allows us to use a wide range of #BSD/Linux/Windows tools like static analyzers etc on the BSD kernel source #code which do not require us to actually RUN the kernel on the final target. cmake_minimum_required(VERSION 3.1) project (bsdassym) #We can run some simple sanity checks on the built genassym/genoffset #programs controlled by the BUILD_TESTING option, which defaults to ON. #See https://stackoverflow.com/questions/50468620/what-does-enable-testing-do-in-cmake include(CTest) #This introduces the BUILD_TESTING option, which defaults to ON #We will use the LLVM library to read any object file format output by #compilers that are able to compile the BSD kernel. Using LLVM makes the #genassym and genoffset programs independent of the toolchain we use because #LLVM aims to read all object file formats. We could actually go so far as to #switch toolchain without rebuilding the genassym/genoffset programs. They #depend only on the LLVM libraries and the host C++ runtime/STL. find_package (LLVM CONFIG REQUIRED) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") message(STATUS "LLVM include directory is ${LLVM_INCLUDE_DIRS}") llvm_map_components_to_libnames(LLVM_LIBS support core object) #The genassym program aims to be equivalent to the traditional BSD shell #script BSDROOT/sys/kern/genassym.sh. It generates a header file with #numerical constants as a series of defines based on the content of a given #object file. That object file is in turn generated from a C source file (see #BSDROOT/sys/amd64/amd64/genassym.c) by the (cross)compiler building the BSD #kernel. The C file contains a number of ASSYM declarations like so: #ASSYM (symbol_name,0x1234567812345678LL); #relying on the ASSYM macro defined in BSDROOT/sys/sys/assym.h. add_executable (genassym genassym.cpp) target_include_directories(genassym PRIVATE ${LLVM_INCLUDE_DIRS}) target_compile_definitions(genassym PRIVATE ${LLVM_DEFINITIONS}) target_link_libraries (genassym ${LLVM_LIBS}) #Use genassym to generate a header file testdata.h containing some #symbols obtained from the object file compiled from testdata.c. #The content of the generated file will be verified by the test program #test_genoffset. #MSVC does not generate correct object code for the test data. #See the file strange.txt. Instead of using the object file generated by #compiling testdata_genassym.c, use an object file that we have compiled #manually with clang. if (MSVC) add_custom_command ( OUTPUT ${CMAKE_BINARY_DIR}/from_obj_via_genassym.h COMMAND genassym ${CMAKE_SOURCE_DIR}/testdata_genassym.obj ${CMAKE_BINARY_DIR}/from_obj_via_genassym.h DEPENDS genassym ) else() add_custom_command ( OUTPUT ${CMAKE_BINARY_DIR}/from_obj_via_genassym.h COMMAND genassym $ ${CMAKE_BINARY_DIR}/from_obj_via_genassym.h DEPENDS genassym testdata_genassym ) endif() #Test program which will try to use the header file from_obj_via_genassym.h #generated by genassym. add_executable (test_genassym test_genassym.cpp ${CMAKE_BINARY_DIR}/from_obj_via_genassym.h) target_include_directories (test_genassym PRIVATE ${CMAKE_BINARY_DIR}) #In order to find the generated file from_obj_via_genassym.h add_test (NAME test_genassym COMMAND test_genassym) #Program to generate a header file containing a simplified C struct #declaration containing only selected struct attributes based on the content #of an object file. The object file is in turn generated from a C source #file that contains a number of OFFSYM declarations for those attributes that #should be incuded in the simplified struct. Fillers are used to make up for #attributes that are not included, so that the resulting simpilfied struct #is compatible with the full struct. #OFFSYM(attribute_name, parent_type, attribute_data_type) #See the file BSDROOT/sys/kern/genoffset.c. The concept of the genoffset #program is the same as that of the genassym program, just that it outputs #different information based on the OFFSYM macro defined in #BSDROOT/sys/sys/assym.h and not the ASSYM() macro that genassym relies on. add_executable (genoffset genoffset.cpp) target_include_directories(genoffset PRIVATE ${LLVM_INCLUDE_DIRS}) target_compile_definitions(genoffset PRIVATE ${LLVM_DEFINITIONS}) target_link_libraries (genoffset ${LLVM_LIBS}) #Use genoffset to generate a header file "from_obj_via_genoffset.h" containing #a simplified version of the structure embedded in the single object file in #the target testdata_genoffset. The content of the generated file will be #verified by the test program test_genoffset. #MSVC does not generate correct object code for the test data. #Instead of using the object file generated by compiling testdata_genoffset.c, #use an object file that we have compiled manually with clang. if (MSVC) add_custom_command ( OUTPUT ${CMAKE_BINARY_DIR}/from_obj_via_genoffset.h COMMAND genoffset ${CMAKE_SOURCE_DIR}/testdata_genoffset.obj ${CMAKE_BINARY_DIR}/from_obj_via_genoffset.h DEPENDS genoffset ) else() add_custom_command ( OUTPUT ${CMAKE_BINARY_DIR}/from_obj_via_genoffset.h COMMAND genoffset $ ${CMAKE_BINARY_DIR}/from_obj_via_genoffset.h DEPENDS genoffset testdata_genoffset ) endif() #Test program which will compare offsets of a struct generated by genoffset #with the original struct that was passed to genoffset. add_executable (test_genoffset test_genoffset.cpp ${CMAKE_BINARY_DIR}/from_obj_via_genoffset.h) target_include_directories (test_genoffset PRIVATE ${CMAKE_BINARY_DIR}) #In order to find the generated file from_obj_via_genoffset.h add_test (NAME test_genoffset COMMAND test_genoffset) #Generate a couple of object files containing test data that we can feed to #genassym/genoffset. Compiling testdata.c produces an object file containing #test data suitable for the genassym program. Compiling testoffset.c produces #an object file containing test data suitable for the genoffset program. #Each of the OBJECT libraries below should contain just a SINGLE object file #which will be fed to genassym/genoffset in order to verify proper operation. #NOTE: The generated test data may be erroneous when compiled with Visual Studio!!! #See the file strange.txt for explanation. This should not matter in practice #because compiling the BSD kernel requires clang, and clang compiles the #affected object files correctly. add_library (testdata_genassym OBJECT testdata_genassym.c to_obj_aimed_at_genassym.h assym.h) add_library (testdata_genoffset OBJECT testdata_genoffset.c to_obj_aimed_at_genoffset.h assym.h) install (TARGETS genassym genoffset RUNTIME DESTINATION bin) install (FILES assym.h DESTINATION include)