agencies

정규표현식을 이용하여 C 언어에서 함수 추출해보기! 2탄! 본문

Ⅰ. 프로그래밍

정규표현식을 이용하여 C 언어에서 함수 추출해보기! 2탄!

agencies 2024. 12. 17. 00:44

함수추출.py
0.00MB

 

 

 

import re

def extract_user_defined_function_names(file_path):
    with open(file_path, 'r') as file:
        code = file.read()
    
    # Improved regex pattern to handle attributes and complex declarations
    function_pattern = re.compile(
        r'\b(?:static|extern|inline)?\s*'  # Optional specifiers
        r'(?:__attribute__\s*\(\([^)]*\)\)\s*)*'  # Optional __attribute__ specifier
        r'(?:void|int|long|float|double|char|short|unsigned|signed|struct\s+\w+|enum\s+\w+|[a-zA-Z_]\w*[*]*)\s+'  # Return type or complex type
        r'(?:__attribute__\s*\(\([^)]*\)\)\s*)*'  # Another possible __attribute__ specifier
        r'([a-zA-Z_]\w*)\s*\(',  # Function name
        re.DOTALL
    )

    # Extract all matches
    all_function_names = function_pattern.findall(code)
    
    # Remove duplicates
    unique_function_names = set(all_function_names)
    
    # Initialize user_defined_functions as an empty set
    user_defined_functions = set()

    # Regex to find actual function definitions in the code
    definition_pattern = re.compile(
        r'\b(?:static|extern|inline)?\s*'  # Optional specifiers
        r'(?:__attribute__\s*\(\([^)]*\)\)\s*)*'  # Optional __attribute__ specifier
        r'(?:void|int|long|float|double|char|short|unsigned|signed|struct\s+\w+|enum\s+\w+|[a-zA-Z_]\w*[*]*)\s+'  # Return type or complex type
        r'(?:__attribute__\s*\(\([^)]*\)\)\s*)*'  # Another possible __attribute__ specifier
        r'([a-zA-Z_]\w*)\s*\([^)]*\)\s*\{',  # Function name with arguments and opening brace
        re.DOTALL
    )

    # Extract function definitions
    defined_functions = definition_pattern.findall(code)
    user_defined_functions.update(defined_functions)

    # Filter out library function calls (leave only user-defined functions)
    filtered_functions = sorted(user_defined_functions.intersection(unique_function_names))
    
    # Print and return the filtered function names
    if 'if' in filtered_functions:
        filtered_functions.remove('if')
    print(filtered_functions)
    return filtered_functions

# Call the function with your uploaded file
extract_user_defined_function_names('./test.c')

 

이전에는 setup_seccomp 이런것을 못 발견했는데, 지금은 잘 잡습니다.