From 22a721d3a9f71a85007c60088d0e042280b877cc Mon Sep 17 00:00:00 2001 From: Kimsehwan96 Date: Tue, 4 May 2021 03:47:58 +0900 Subject: [PATCH 1/4] refactor pyjosa's architecture with classes --- pyjosa/jonsung.py | 35 ++++++++++++++++++----------------- pyjosa/josa.py | 32 +++++++++++++++++--------------- pyjosa/test.py | 6 +++--- 3 files changed, 38 insertions(+), 35 deletions(-) diff --git a/pyjosa/jonsung.py b/pyjosa/jonsung.py index b19f244..5f02e31 100644 --- a/pyjosa/jonsung.py +++ b/pyjosa/jonsung.py @@ -2,24 +2,25 @@ import re from pyjosa.exceptions import NotHangleException -START_HANGLE = 44032 -J_IDX = 28 +class Jongsung: + # we will not instantiate this class because it's not really needed. + START_HANGLE = 44032 + J_IDX = 28 -def is_hangle(string: str) -> bool: - last_char = string[-1] - if re.match('.*[ㄱ-ㅎㅏ-ㅣ가-힣]+.*', last_char) is None: - return False - return True - - -def has_jongsung(string: str) -> bool: - if not is_hangle(string): - raise NotHangleException - - last_char = string[-1] - if (ord(last_char) - START_HANGLE) % J_IDX > 0: + @staticmethod + def is_hangle(self, string: str) -> bool: + last_char = string[-1] + if re.match('.*[ㄱ-ㅎㅏ-ㅣ가-힣]+.*', last_char) is None: + return False return True - return False -# TODO: can we make above functions as Decorator? + @classmethod + def has_jongsung(cls, string: str) -> bool: + if not cls.is_hangle(string): + raise NotHangleException + + last_char = string[-1] + if (ord(last_char) - cls.START_HANGLE) % cls.J_IDX > 0: + return True + return False diff --git a/pyjosa/josa.py b/pyjosa/josa.py index eafef0e..3fdbb93 100644 --- a/pyjosa/josa.py +++ b/pyjosa/josa.py @@ -1,40 +1,42 @@ -from pyjosa.jonsung import has_jongsung +from pyjosa.jonsung import Jongsung from pyjosa.exceptions import JosaTypeException class Josa: @staticmethod - def get_josa(string, josa) -> str: + def get_josa(string:str, josa:str) -> str: if (josa == '을') or (josa == '를'): - return '을' if has_jongsung(string) else '를' + return '을' if Jongsung.has_jongsung(string) else '를' elif (josa == '은') or (josa == '는'): - return '은' if has_jongsung(string) else '는' + return '은' if Jongsung.has_jongsung(string) else '는' elif (josa == '이') or (josa == '가'): - return '이' if has_jongsung(string) else '가' + return '이' if Jongsung.has_jongsung(string) else '가' elif (josa == '과') or (josa == '와'): - return '과' if has_jongsung(string) else '와' + return '과' if Jongsung.has_jongsung(string) else '와' elif (josa == '이나') or (josa == '나'): - return '이나' if has_jongsung(string) else '나' + return '이나' if Jongsung.has_jongsung(string) else '나' elif (josa == '으로') or (josa == '로'): - return '으로' if has_jongsung(string) else '로' + return '으로' if Jongsung.has_jongsung(string) else '로' else: raise JosaTypeException @staticmethod - def get_full_string(string, josa) -> str: + def get_full_string(string: str, josa: str) -> str: if (josa == '을') or (josa == '를'): - return string + '을' if has_jongsung(string) else string + '를' + return string + '을' if Jongsung.has_jongsung(string) else string + '를' elif (josa == '은') or (josa == '는'): - return string + '은' if has_jongsung(string) else string + '는' + return string + '은' if Jongsung.has_jongsung(string) else string + '는' elif (josa == '이') or (josa == '가'): - return string + '이' if has_jongsung(string) else string + '가' + return string + '이' if Jongsung.has_jongsung(string) else string + '가' elif (josa == '과') or (josa == '와'): - return string + '과' if has_jongsung(string) else string + '와' + return string + '과' if Jongsung.has_jongsung(string) else string + '와' elif (josa == '이나') or (josa == '나'): - return string + '이나' if has_jongsung(string) else string + '나' + return string + '이나' if Jongsung.has_jongsung(string) else string + '나' elif (josa == '으로') or (josa == '로'): - return string + '으로' if has_jongsung(string) else string + '로' + return string + '으로' if Jongsung.has_jongsung(string) else string + '로' else: raise JosaTypeException + +# TODO : Refactor pyjosa's architecture with oop. \ No newline at end of file diff --git a/pyjosa/test.py b/pyjosa/test.py index dd02a5d..579909b 100644 --- a/pyjosa/test.py +++ b/pyjosa/test.py @@ -1,4 +1,4 @@ -from pyjosa.jonsung import is_hangle, has_jongsung +from pyjosa.jonsung import Jongsung import unittest @@ -21,10 +21,10 @@ def setUp(self): def test_line_count(self): for v in self.kor: - self.assertTrue(is_hangle(v)) + self.assertTrue(Jongsung.is_hangle(v)) for v in self.not_kor: - self.assertFalse(is_hangle(v)) + self.assertFalse(Jongsung.is_hangle(v)) if __name__ == '__main__': From f65aa8d5a27fdea6423bdbf107dae1c6bcd89abf Mon Sep 17 00:00:00 2001 From: Kimsehwan96 Date: Tue, 4 May 2021 03:49:41 +0900 Subject: [PATCH 2/4] add todo comment --- pyjosa/josa.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyjosa/josa.py b/pyjosa/josa.py index 3fdbb93..0b88c42 100644 --- a/pyjosa/josa.py +++ b/pyjosa/josa.py @@ -39,4 +39,5 @@ def get_full_string(string: str, josa: str) -> str: else: raise JosaTypeException -# TODO : Refactor pyjosa's architecture with oop. \ No newline at end of file +# TODO : Refactor pyjosa's architecture with oop. +# TODO : need to remove duplicated fuxxing codes with 'if ... elif...' (refactor) From 77a5c5036914b8469beb6d61a23f271403c3f4f8 Mon Sep 17 00:00:00 2001 From: kimsehwan96 Date: Mon, 5 Jul 2021 21:29:39 +0900 Subject: [PATCH 3/4] =?UTF-8?q?:sparkles:=20=EC=9D=B4=EB=A6=84=20=EB=92=A4?= =?UTF-8?q?=EC=97=90=20=EC=98=A4=EB=8A=94=20=EC=A1=B0=EC=82=AC=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80=20('=EC=9D=B4=EA=B0=80')?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 ++++++ docs/2_EXAMPLE/example1.md | 6 ++++++ docs/2_EXAMPLE/example2.md | 4 ++-- docs/README.md | 6 ++++++ pyjosa/exceptions.py | 6 +++++- pyjosa/jonsung.py | 6 ++++-- pyjosa/josa.py | 6 +++++- setup.py | 2 +- 8 files changed, 35 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index cf0a592..c84d07f 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,9 @@ print(Josa.get_josa("닭", "는")) # 은 print(Josa.get_josa("산", "으로")) # 으로 print(Josa.get_josa("명예", "과")) # 와 print(Josa.get_josa("물", "나")) # 이나 +# 사람 이름 + 이가/가 를 구분하기 위해서는 조사부분에 '이가'를 입력합니다. +print(Josa.get_josa("예나", "이가")) # 가 +print(Josa.get_josa("세환", "이가")) # 이가 print(Josa.get_full_string("철수", "은")) # 철수는 print(Josa.get_full_string("오리", "을")) # 오리를 @@ -39,6 +42,9 @@ print(Josa.get_full_string("닭", "는")) # 닭은 print(Josa.get_full_string("산", "으로")) # 산으로 print(Josa.get_full_string("명예", "과")) # 명예와 print(Josa.get_full_string("물", "나")) # 물이나 +# 사람 이름 + 이가/가 를 구분하기 위해서는 조사부분에 '이가'를 입력합니다. +print(Josa.get_josa("예나", "이가")) # 예나가 +print(Josa.get_josa("세환", "이가")) # 세환이가 ``` diff --git a/docs/2_EXAMPLE/example1.md b/docs/2_EXAMPLE/example1.md index 53e1cbd..37db282 100644 --- a/docs/2_EXAMPLE/example1.md +++ b/docs/2_EXAMPLE/example1.md @@ -9,6 +9,9 @@ print(Josa.get_josa("닭", "는")) # 은 print(Josa.get_josa("산", "으로")) # 으로 print(Josa.get_josa("명예", "과")) # 와 print(Josa.get_josa("물", "나")) # 이나 +# 사람 이름 + 이가/가 를 구분하기 위해서는 조사부분에 '이가'를 입력합니다. +print(Josa.get_josa("예나", "이가")) # 가 +print(Josa.get_josa("세환", "이가")) # 이가 print(Josa.get_full_string("철수", "은")) # 철수는 print(Josa.get_full_string("오리", "을")) # 오리를 @@ -16,4 +19,7 @@ print(Josa.get_full_string("닭", "는")) # 닭은 print(Josa.get_full_string("산", "으로")) # 산으로 print(Josa.get_full_string("명예", "과")) # 명예와 print(Josa.get_full_string("물", "나")) # 물이나 +# 사람 이름 + 이가/가 를 구분하기 위해서는 조사부분에 '이가'를 입력합니다. +print(Josa.get_josa("예나", "이가")) # 예나가 +print(Josa.get_josa("세환", "이가")) # 세환이가 ``` \ No newline at end of file diff --git a/docs/2_EXAMPLE/example2.md b/docs/2_EXAMPLE/example2.md index 1be0573..2e60b28 100644 --- a/docs/2_EXAMPLE/example2.md +++ b/docs/2_EXAMPLE/example2.md @@ -6,7 +6,7 @@ from pyjosa.josa import Josa subject = '철수' obj = '산' -full_string = f'{Josa.get_full_string(subject, '은')} {Josa.get_full_string(obj, '를')} 간다' +full_string = f'{Josa.get_full_string(subject, "은")} {Josa.get_full_string(obj, "를")} 간다' print(full_string) # 철수는 산을 오른다 ``` @@ -18,6 +18,6 @@ subjects = ['철수', '세환', '길동'] obj = ['산', '바다', '집'] for i, v in enumerate(subjects): - print(f'{Josa.get_full_string(v, '은')} {Josa.get_full_string(obj[i], '를')} 간다') + print(f'{Josa.get_full_string(v, "은")} {Josa.get_full_string(obj[i], "를")} 간다') ``` \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index 04873b2..390752f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -33,6 +33,9 @@ print(Josa.get_josa("닭", "는")) # 은 print(Josa.get_josa("산", "으로")) # 으로 print(Josa.get_josa("명예", "과")) # 와 print(Josa.get_josa("물", "나")) # 이나 +# 사람 이름 + 이가/가 를 구분하기 위해서는 조사부분에 '이가'를 입력합니다. +print(Josa.get_josa("예나", "이가")) # 가 +print(Josa.get_josa("세환", "이가")) # 이가 print(Josa.get_full_string("철수", "은")) # 철수는 print(Josa.get_full_string("오리", "을")) # 오리를 @@ -40,6 +43,9 @@ print(Josa.get_full_string("닭", "는")) # 닭은 print(Josa.get_full_string("산", "으로")) # 산으로 print(Josa.get_full_string("명예", "과")) # 명예와 print(Josa.get_full_string("물", "나")) # 물이나 +# 사람 이름 + 이가/가 를 구분하기 위해서는 조사부분에 '이가'를 입력합니다. +print(Josa.get_josa("예나", "이가")) # 예나가 +print(Josa.get_josa("세환", "이가")) # 세환이가 ``` ## 깃허브 주소 diff --git a/pyjosa/exceptions.py b/pyjosa/exceptions.py index 854b39e..576f5a9 100644 --- a/pyjosa/exceptions.py +++ b/pyjosa/exceptions.py @@ -4,4 +4,8 @@ def __init__(self): class JosaTypeException(Exception): def __init__(self): - super().__init__("메서드의 인자로 주어진 조사가 올바르지 않습니다.") \ No newline at end of file + super().__init__("메서드의 인자로 주어진 조사가 올바르지 않습니다.") + +class JongsungInstantiationException(Exception): + def __init__(self): + super().__init__("종성 클래스는 인스턴스화 할 필요가 없습니다.") \ No newline at end of file diff --git a/pyjosa/jonsung.py b/pyjosa/jonsung.py index 5f02e31..047bce9 100644 --- a/pyjosa/jonsung.py +++ b/pyjosa/jonsung.py @@ -1,15 +1,17 @@ #-*- coding: utf-8 -*- import re -from pyjosa.exceptions import NotHangleException +from pyjosa.exceptions import NotHangleException, JongsungInstantiationException class Jongsung: # we will not instantiate this class because it's not really needed. + def __init__(self): + raise JongsungInstantiationException START_HANGLE = 44032 J_IDX = 28 @staticmethod - def is_hangle(self, string: str) -> bool: + def is_hangle(string: str) -> bool: last_char = string[-1] if re.match('.*[ㄱ-ㅎㅏ-ㅣ가-힣]+.*', last_char) is None: return False diff --git a/pyjosa/josa.py b/pyjosa/josa.py index 0b88c42..418d67c 100644 --- a/pyjosa/josa.py +++ b/pyjosa/josa.py @@ -18,6 +18,8 @@ def get_josa(string:str, josa:str) -> str: return '이나' if Jongsung.has_jongsung(string) else '나' elif (josa == '으로') or (josa == '로'): return '으로' if Jongsung.has_jongsung(string) else '로' + elif josa == '이가': + return '이가' if Jongsung.has_jongsung(string) else '가' else: raise JosaTypeException @@ -36,8 +38,10 @@ def get_full_string(string: str, josa: str) -> str: return string + '이나' if Jongsung.has_jongsung(string) else string + '나' elif (josa == '으로') or (josa == '로'): return string + '으로' if Jongsung.has_jongsung(string) else string + '로' + elif josa == '이가': + return string + '이가' if Jongsung.has_jongsung(string) else string + '가' else: raise JosaTypeException # TODO : Refactor pyjosa's architecture with oop. -# TODO : need to remove duplicated fuxxing codes with 'if ... elif...' (refactor) +# TODO : need to remove duplicated codes with 'if ... elif...' (refactor) diff --git a/setup.py b/setup.py index b7729d0..7881ac8 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name = 'pyjosa', - version = '1.0.0', + version = '1.0.1', description = '한국어 조사 처리 패키지', author = 'sehwan.kim', author_email = 'sehwan.kim@ingkle.com', From db5fd7ca4be5a20bd25f84c10c68d46fa7b00924 Mon Sep 17 00:00:00 2001 From: kimsehwan96 Date: Mon, 5 Jul 2021 21:29:55 +0900 Subject: [PATCH 4/4] :memo: update document --- docs/Gemfile.lock | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 8c2d0b1..a5ca9a4 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -1,13 +1,13 @@ GEM remote: https://rubygems.org/ specs: - activesupport (6.0.3.5) + activesupport (6.0.4) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 0.7, < 2) minitest (~> 5.1) tzinfo (~> 1.1) zeitwerk (~> 2.2, >= 2.2.2) - addressable (2.7.0) + addressable (2.8.0) public_suffix (>= 2.0.2, < 5.0) coffee-script (2.4.1) coffee-script-source @@ -16,22 +16,34 @@ GEM colorator (1.1.0) commonmarker (0.17.13) ruby-enum (~> 0.5) - concurrent-ruby (1.1.8) - dnsruby (1.61.5) + concurrent-ruby (1.1.9) + dnsruby (1.61.7) simpleidn (~> 0.1) em-websocket (0.5.2) eventmachine (>= 0.12.9) http_parser.rb (~> 0.6.0) - ethon (0.12.0) - ffi (>= 1.3.0) + ethon (0.14.0) + ffi (>= 1.15.0) eventmachine (1.2.7) - execjs (2.7.0) - faraday (1.3.0) + execjs (2.8.1) + faraday (1.5.0) + faraday-em_http (~> 1.0) + faraday-em_synchrony (~> 1.0) + faraday-excon (~> 1.1) + faraday-httpclient (~> 1.0.1) faraday-net_http (~> 1.0) + faraday-net_http_persistent (~> 1.1) + faraday-patron (~> 1.0) multipart-post (>= 1.2, < 3) - ruby2_keywords + ruby2_keywords (>= 0.0.4) + faraday-em_http (1.0.0) + faraday-em_synchrony (1.0.0) + faraday-excon (1.1.0) + faraday-httpclient (1.0.1) faraday-net_http (1.0.1) - ffi (1.15.0) + faraday-net_http_persistent (1.1.0) + faraday-patron (1.0.0) + ffi (1.15.3) forwardable-extended (2.6.0) gemoji (3.0.1) github-pages (209) @@ -203,7 +215,7 @@ GEM kramdown-parser-gfm (1.1.0) kramdown (~> 2.0) liquid (4.0.3) - listen (3.4.1) + listen (3.5.1) rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) mercenary (0.3.6) @@ -213,24 +225,24 @@ GEM jekyll-seo-tag (~> 2.1) minitest (5.14.4) multipart-post (2.1.1) - nokogiri (1.11.2-x86_64-darwin) + nokogiri (1.11.7-x86_64-darwin) racc (~> 1.4) - octokit (4.20.0) + octokit (4.21.0) faraday (>= 0.9) sawyer (~> 0.8.0, >= 0.5.3) pathutil (0.16.2) forwardable-extended (~> 2.6) public_suffix (3.1.1) racc (1.5.2) - rb-fsevent (0.10.4) + rb-fsevent (0.11.0) rb-inotify (0.10.1) ffi (~> 1.0) - rexml (3.2.4) + rexml (3.2.5) rouge (3.23.0) ruby-enum (0.9.0) i18n ruby2_keywords (0.0.4) - rubyzip (2.3.0) + rubyzip (2.3.1) safe_yaml (1.0.5) sass (3.7.4) sass-listen (~> 4.0.0) @@ -263,4 +275,4 @@ DEPENDENCIES jekyll-rtd-theme (~> 2.0.6) BUNDLED WITH - 2.2.14 + 2.2.21