diff --git a/src/PyReprism/languages/kotlin.py b/src/PyReprism/languages/kotlin.py index 3035406..a8e34e9 100644 --- a/src/PyReprism/languages/kotlin.py +++ b/src/PyReprism/languages/kotlin.py @@ -16,26 +16,58 @@ def keywords() -> list: return keyword @staticmethod - def comment_regex(): + def comment_regex() -> re.Pattern: pattern = re.compile(r'(?P//.*?$|/\*.*?\*/|/\*.*?$|^.*?\*/|[{}]+)|(?P\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"{}]*)', re.DOTALL | re.MULTILINE) return pattern @staticmethod - def number_regex(): + def number_regex() -> re.Pattern: pattern = re.compile(r'\b(?:0[bx][\da-fA-F]+|\d+(?:\.\d+)?(?:e[+-]?\d+)?[fFL]?)\b') return pattern @staticmethod - def operator_regex(): + def operator_regex() -> re.Pattern: pattern = re.compile(r'+[+=]?|-[-=>]?|==?=?|!(?:!|==?)?|[\/*%<>]=?|[?:]:?|\.\.|&&|\|\||\b(?:and|inv|or|shl|shr|ushr|xor)\b') return pattern @staticmethod - def keywords_regex(): + def keywords_regex() -> re.Pattern: return re.compile(r'\b(' + '|'.join(Kotlin.keywords()) + r')\b') + @staticmethod + def delimiters_regex() -> re.Pattern: + """ + Compile and return a regular expression pattern to identify Kotlin language delimiters. + + This function generates a regular expression that matches Kotlin language delimiters, which include parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, angle brackets `<`, `>`, and the question mark `?`. + + :return: A compiled regex pattern to match Kotlin delimiters. + :rtype: re.Pattern + """ + return re.compile(r'[()\[\]{}.,:;<>?]') + + @staticmethod + def boolean_regex() -> re.Pattern: + """ + Compile and return a regular expression pattern to identify Kotlin boolean literals. + + This function generates a regular expression that matches the Kotlin boolean literals `true`, `false`, and the special constant `null`. + + :return: A compiled regex pattern to match Kotlin boolean literals and `null`. + :rtype: re.Pattern + """ + return re.compile(r'\b(?:true|false|null)\b') + @staticmethod def remove_comments(source_code: str, isList: bool = False) -> str: + """ + Remove comments from the provided Kotlin source code string. + + :param str source_code: The Kotlin source code from which to remove comments. + :return: The source code with all comments removed. + :rtype: str + """ + return Kotlin.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip() result = [] for match in Kotlin.comment_regex().finditer(source_code): if match.group('noncomment'): @@ -45,5 +77,5 @@ def remove_comments(source_code: str, isList: bool = False) -> str: return ''.join(result) @staticmethod - def remove_keywords(source: str): + def remove_keywords(source: str) -> str: return re.sub(re.compile(Kotlin.keywords_regex()), '', source) diff --git a/src/PyReprism/languages/rust.py b/src/PyReprism/languages/rust.py index b3ba746..38e7176 100644 --- a/src/PyReprism/languages/rust.py +++ b/src/PyReprism/languages/rust.py @@ -2,13 +2,13 @@ from PyReprism.utils import extension -class Dart: +class Rust: def __init__(): pass @staticmethod def file_extension() -> str: - return extension.dart + return extension.rust @staticmethod def keywords() -> list: @@ -16,28 +16,60 @@ def keywords() -> list: return keyword @staticmethod - def comment_regex(): + def comment_regex() -> re.Pattern: pattern = re.compile(r'(?P//.*?$|///.*?$|/\*[\s\S]*?\*/|/\*.*?$|^.*?\*/)|(?P\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"]*)', re.DOTALL | re.MULTILINE) return pattern @staticmethod - def number_regex(): + def number_regex() -> re.Pattern: pattern = re.compile(r'\b(?:0x[\dA-Fa-f](?:_?[\dA-Fa-f])*|0o[0-7](?:_?[0-7])*|0b[01](?:_?[01])*|(\d(?:_?\d)*)?\.?\d(?:_?\d)*(?:[Ee][+-]?\d+)?)(?:_?(?:[iu](?:8|16|32|64)?|f32|f64))?\b') return pattern @staticmethod - def operator_regex(): + def operator_regex() -> re.Pattern: pattern = re.compile(r'[-+*\/%!^]=?|=[=>]?|@|&[&=]?|\|[|=]?|<>?=?') return pattern @staticmethod - def keywords_regex(): - return re.compile(r'\b(' + '|'.join(Dart.keywords()) + r')\b') + def keywords_regex() -> re.Pattern: + return re.compile(r'\b(' + '|'.join(Rust.keywords()) + r')\b') + + @staticmethod + def boolean_regex() -> re.Pattern: + """ + Compile and return a regular expression pattern to identify Rust boolean literals. + + This function generates a regular expression that matches the Rust boolean literals `true`, `false`, and the special constant `None`. + + :return: A compiled regex pattern to match Rust boolean literals and `None`. + :rtype: re.Pattern + """ + return re.compile(r'\b(?:true|false|None)\b') + + @staticmethod + def rust_delimiters_regex() -> re.Pattern: + """ + Compile and return a regular expression pattern to identify Rust language delimiters. + + This function generates a regular expression that matches Rust language delimiters, which include parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, angle brackets `<`, `>`, and the question mark `?`. + + :return: A compiled regex pattern to match Rust delimiters. + :rtype: re.Pattern + """ + return re.compile(r'[()\[\]{}.,:;<>?]') @staticmethod def remove_comments(source_code: str, isList: bool = False) -> str: + """ + Remove comments from the provided Rust source code string. + + :param str source_code: The Rust source code from which to remove comments. + :return: The source code with all comments removed. + :rtype: str + """ + return Rust.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip() result = [] - for match in Dart.comment_regex().finditer(source_code): + for match in Rust.comment_regex().finditer(source_code): if match.group('noncomment'): result.append(match.group('noncomment')) if isList: @@ -45,5 +77,5 @@ def remove_comments(source_code: str, isList: bool = False) -> str: return ''.join(result) @staticmethod - def remove_keywords(source: str): - return re.sub(re.compile(Dart.keywords_regex()), '', source) + def remove_keywords(source: str) -> str: + return re.sub(re.compile(Rust.keywords_regex()), '', source)