Ⅰ. 프로그래밍
정규표현식을 이용하여 C 언어에서 함수 추출해보기! 2탄!
agencies
2024. 12. 17. 00:44
import re
def extract_user_defined_function_names(file_path):
with open(file_path, 'r') as file:
code = file.read()
# Improved regex pattern to handle attributes and complex declarations
function_pattern = re.compile(
r'\b(?:static|extern|inline)?\s*' # Optional specifiers
r'(?:__attribute__\s*\(\([^)]*\)\)\s*)*' # Optional __attribute__ specifier
r'(?:void|int|long|float|double|char|short|unsigned|signed|struct\s+\w+|enum\s+\w+|[a-zA-Z_]\w*[*]*)\s+' # Return type or complex type
r'(?:__attribute__\s*\(\([^)]*\)\)\s*)*' # Another possible __attribute__ specifier
r'([a-zA-Z_]\w*)\s*\(', # Function name
re.DOTALL
)
# Extract all matches
all_function_names = function_pattern.findall(code)
# Remove duplicates
unique_function_names = set(all_function_names)
# Initialize user_defined_functions as an empty set
user_defined_functions = set()
# Regex to find actual function definitions in the code
definition_pattern = re.compile(
r'\b(?:static|extern|inline)?\s*' # Optional specifiers
r'(?:__attribute__\s*\(\([^)]*\)\)\s*)*' # Optional __attribute__ specifier
r'(?:void|int|long|float|double|char|short|unsigned|signed|struct\s+\w+|enum\s+\w+|[a-zA-Z_]\w*[*]*)\s+' # Return type or complex type
r'(?:__attribute__\s*\(\([^)]*\)\)\s*)*' # Another possible __attribute__ specifier
r'([a-zA-Z_]\w*)\s*\([^)]*\)\s*\{', # Function name with arguments and opening brace
re.DOTALL
)
# Extract function definitions
defined_functions = definition_pattern.findall(code)
user_defined_functions.update(defined_functions)
# Filter out library function calls (leave only user-defined functions)
filtered_functions = sorted(user_defined_functions.intersection(unique_function_names))
# Print and return the filtered function names
if 'if' in filtered_functions:
filtered_functions.remove('if')
print(filtered_functions)
return filtered_functions
# Call the function with your uploaded file
extract_user_defined_function_names('./test.c')
이전에는 setup_seccomp 이런것을 못 발견했는데, 지금은 잘 잡습니다.