Skip to content

Commit

Permalink
feat: comments, boolean and delimiters regex
Browse files Browse the repository at this point in the history
  • Loading branch information
danielogen committed Aug 12, 2024
1 parent 536238d commit d812f3f
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 15 deletions.
42 changes: 37 additions & 5 deletions src/PyReprism/languages/kotlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,58 @@ def keywords() -> list:
return keyword

@staticmethod
def comment_regex():
def comment_regex() -> re.Pattern:
pattern = re.compile(r'(?P<comment>//.*?$|/\*.*?\*/|/\*.*?$|^.*?\*/|[{}]+)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"{}]*)', re.DOTALL | re.MULTILINE)
return pattern

@staticmethod
def number_regex():
def number_regex() -> re.Pattern:
pattern = re.compile(r'\b(?:0[bx][\da-fA-F]+|\d+(?:\.\d+)?(?:e[+-]?\d+)?[fFL]?)\b')
return pattern

@staticmethod
def operator_regex():
def operator_regex() -> re.Pattern:
pattern = re.compile(r'+[+=]?|-[-=>]?|==?=?|!(?:!|==?)?|[\/*%<>]=?|[?:]:?|\.\.|&&|\|\||\b(?:and|inv|or|shl|shr|ushr|xor)\b')
return pattern

@staticmethod
def keywords_regex():
def keywords_regex() -> re.Pattern:
return re.compile(r'\b(' + '|'.join(Kotlin.keywords()) + r')\b')

@staticmethod
def delimiters_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify Kotlin language delimiters.
This function generates a regular expression that matches Kotlin language delimiters, which include parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, angle brackets `<`, `>`, and the question mark `?`.
:return: A compiled regex pattern to match Kotlin delimiters.
:rtype: re.Pattern
"""
return re.compile(r'[()\[\]{}.,:;<>?]')

@staticmethod
def boolean_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify Kotlin boolean literals.
This function generates a regular expression that matches the Kotlin boolean literals `true`, `false`, and the special constant `null`.
:return: A compiled regex pattern to match Kotlin boolean literals and `null`.
:rtype: re.Pattern
"""
return re.compile(r'\b(?:true|false|null)\b')

@staticmethod
def remove_comments(source_code: str, isList: bool = False) -> str:
"""
Remove comments from the provided Kotlin source code string.
:param str source_code: The Kotlin source code from which to remove comments.
:return: The source code with all comments removed.
:rtype: str
"""
return Kotlin.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
result = []
for match in Kotlin.comment_regex().finditer(source_code):
if match.group('noncomment'):
Expand All @@ -45,5 +77,5 @@ def remove_comments(source_code: str, isList: bool = False) -> str:
return ''.join(result)

@staticmethod
def remove_keywords(source: str):
def remove_keywords(source: str) -> str:
return re.sub(re.compile(Kotlin.keywords_regex()), '', source)
52 changes: 42 additions & 10 deletions src/PyReprism/languages/rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,48 +2,80 @@
from PyReprism.utils import extension


class Dart:
class Rust:
def __init__():
pass

@staticmethod
def file_extension() -> str:
return extension.dart
return extension.rust

@staticmethod
def keywords() -> list:
keyword = 'abstract|alignof|as|be|box|break|const|continue|crate|do|else|enum|extern|false|final|fn|for|if|impl|in|let|loop|match|mod|move|mut|offsetof|once|override|priv|pub|pure|ref|return|sizeof|static|self|struct|super|true|trait|type|typeof|unsafe|unsized|use|virtual|where|while|yield'.split('|')
return keyword

@staticmethod
def comment_regex():
def comment_regex() -> re.Pattern:
pattern = re.compile(r'(?P<comment>//.*?$|///.*?$|/\*[\s\S]*?\*/|/\*.*?$|^.*?\*/)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"]*)', re.DOTALL | re.MULTILINE)
return pattern

@staticmethod
def number_regex():
def number_regex() -> re.Pattern:
pattern = re.compile(r'\b(?:0x[\dA-Fa-f](?:_?[\dA-Fa-f])*|0o[0-7](?:_?[0-7])*|0b[01](?:_?[01])*|(\d(?:_?\d)*)?\.?\d(?:_?\d)*(?:[Ee][+-]?\d+)?)(?:_?(?:[iu](?:8|16|32|64)?|f32|f64))?\b')
return pattern

@staticmethod
def operator_regex():
def operator_regex() -> re.Pattern:
pattern = re.compile(r'[-+*\/%!^]=?|=[=>]?|@|&[&=]?|\|[|=]?|<<?=?|>>?=?')
return pattern

@staticmethod
def keywords_regex():
return re.compile(r'\b(' + '|'.join(Dart.keywords()) + r')\b')
def keywords_regex() -> re.Pattern:
return re.compile(r'\b(' + '|'.join(Rust.keywords()) + r')\b')

@staticmethod
def boolean_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify Rust boolean literals.
This function generates a regular expression that matches the Rust boolean literals `true`, `false`, and the special constant `None`.
:return: A compiled regex pattern to match Rust boolean literals and `None`.
:rtype: re.Pattern
"""
return re.compile(r'\b(?:true|false|None)\b')

@staticmethod
def rust_delimiters_regex() -> re.Pattern:
"""
Compile and return a regular expression pattern to identify Rust language delimiters.
This function generates a regular expression that matches Rust language delimiters, which include parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, angle brackets `<`, `>`, and the question mark `?`.
:return: A compiled regex pattern to match Rust delimiters.
:rtype: re.Pattern
"""
return re.compile(r'[()\[\]{}.,:;<>?]')

@staticmethod
def remove_comments(source_code: str, isList: bool = False) -> str:
"""
Remove comments from the provided Rust source code string.
:param str source_code: The Rust source code from which to remove comments.
:return: The source code with all comments removed.
:rtype: str
"""
return Rust.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
result = []
for match in Dart.comment_regex().finditer(source_code):
for match in Rust.comment_regex().finditer(source_code):
if match.group('noncomment'):
result.append(match.group('noncomment'))
if isList:
return result
return ''.join(result)

@staticmethod
def remove_keywords(source: str):
return re.sub(re.compile(Dart.keywords_regex()), '', source)
def remove_keywords(source: str) -> str:
return re.sub(re.compile(Rust.keywords_regex()), '', source)

0 comments on commit d812f3f

Please sign in to comment.