Skip to content

Commit

Permalink
Merge pull request #225 from ku-ring/develop
Browse files Browse the repository at this point in the history
version 2.10.2
  • Loading branch information
rlagkswn00 authored Dec 29, 2024
2 parents 7ebe9d4 + 595d7d4 commit 997f1e1
Show file tree
Hide file tree
Showing 132 changed files with 7,110 additions and 14,765 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ out/
### API docs ###
**/src/main/resources/static/docs/*

**/ku-stack-firebase-adminsdk-87nwq-5ba04dfc12.json
**/ku-stack-firebase-adminsdk-87nwq-ae6a2df931.json
**/src/main/generated/
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ configurations.all {
}
}

test.onlyIf { System.getenv('DEPLOY_ENV') == 'dev' }
//test.onlyIf { System.getenv('DEPLOY_ENV') == 'dev' }

test {
jacoco {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package com.kustacks.kuring.common.utils.converter;

import java.util.Arrays;
import java.util.regex.Pattern;

public class EmailSupporter {
private static final Pattern AT_PATTERN = Pattern.compile("\\s+at\\s+");
private static final Pattern DOT_PATTERN = Pattern.compile("\\s+dot\\s+");
private static final Pattern EMAIL_PATTERN = Pattern.compile("^[a-zA-Z0-9_!#$%&'\\*+/=?{|}~^.-]+@[a-zA-Z0-9.-]+$");

private static final String KONKUK_DOMAIN = "@konkuk.ac.kr";
private static final String EMPTY_EMAIL = "";

public static boolean isNullOrBlank(String email) {
return email == null || email.isBlank();
}

public static String convertValidEmail(String email) {
if (isNullOrBlank(email)) {
return EMPTY_EMAIL;
}

String[] emailGroups = splitEmails(email);
String[] normalizedEmails = normalizeEmails(emailGroups);

//여러 이메일 중 konkuk을 우선 선택, 없으면 첫번째 내용
return selectPreferredEmail(normalizedEmails);
}

private static String[] splitEmails(String email) {
return email.split("[/,]");
}

private static String[] normalizeEmails(String[] emailGroups) {
return Arrays.stream(emailGroups)
.map(EmailSupporter::normalizeEmail)
.toArray(String[]::new);
}

private static String normalizeEmail(String email) {
if (EMAIL_PATTERN.matcher(email).matches()) {
return email;
}

if (containsSubstitutePatterns(email)) {
return replaceSubstitutePatterns(email);
}

return EMPTY_EMAIL;
}

private static String replaceSubstitutePatterns(String email) {
return email.replaceAll(DOT_PATTERN.pattern(), ".")
.replaceAll(AT_PATTERN.pattern(), "@");
}

private static boolean containsSubstitutePatterns(String email) {
return DOT_PATTERN.matcher(email).find() && AT_PATTERN.matcher(email).find();
}

// Konkuk 도메인 우선 선택
private static String selectPreferredEmail(String[] emails) {
return Arrays.stream(emails)
.filter(email -> email.endsWith(KONKUK_DOMAIN))
.findFirst()
.orElseGet(() -> emails.length > 0 ? emails[0] : EMPTY_EMAIL);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package com.kustacks.kuring.common.utils.converter;

import java.util.regex.Pattern;

public class PhoneNumberSupporter {

private static final Pattern LAST_FOUR_NUMBER_PATTERN = Pattern.compile("\\d{4}");
private static final Pattern FULL_NUMBER_PATTERN = Pattern.compile("02-\\d{3,4}-\\d{4}");
private static final Pattern FULL_NUMBER_WITH_PARENTHESES_PATTERN = Pattern.compile("02[)]\\d{3,4}-\\d{4}");

private static final String EMPTY_PHONE = "";

public static boolean isNullOrBlank(String number) {
return number == null || number.isBlank();
}

public static String convertFullExtensionNumber(String number) {
if (isNullOrBlank(number)) {
return EMPTY_PHONE;
}

if (FULL_NUMBER_PATTERN.matcher(number).matches()) {
return number;
}
if (containsLastFourNumber(number)) {
return "02-450-" + number;
}
if (containsParenthesesPattern(number)) {
return number.replace(")", "-");
}

return EMPTY_PHONE;
}

private static boolean containsLastFourNumber(String number) {
return LAST_FOUR_NUMBER_PATTERN.matcher(number).matches();
}

private static boolean containsParenthesesPattern(String number) {
return FULL_NUMBER_WITH_PARENTHESES_PATTERN.matcher(number).matches();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
import com.kustacks.kuring.staff.domain.Staff;
import org.springframework.data.jpa.repository.JpaRepository;

import java.util.List;

public interface StaffRepository extends JpaRepository<Staff, Long>, StaffQueryRepository {

List<Staff> findByDeptContaining(String deptName);
}
3 changes: 2 additions & 1 deletion src/main/java/com/kustacks/kuring/staff/domain/Email.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public Email(String email) {
}

private boolean isValidEmail(String email) {
return !Objects.isNull(email) && patternMatches(email);
return Objects.nonNull(email) &&
(patternMatches(email) || Objects.equals(email,""));
}

private boolean patternMatches(String email) {
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/com/kustacks/kuring/staff/domain/Phone.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ public class Phone {
= Pattern.compile("(\\d{3,4})[-\\s]*(\\d{4})");
private static final String SEOUL_AREA_CODE = "02";
private static final String DELIMITER = "-";
private static final String EMPTY_NUMBER = "";

@Column(name = "phone", length = 64)
private String value;

public Phone(String phone) {
if(isEmptyNumbers(phone)) {
this.value = DELIMITER;
this.value = EMPTY_NUMBER;
return;
}

Expand Down Expand Up @@ -71,7 +72,7 @@ private boolean isValidNumbersAndSet(String phone) {
}

private static boolean isEmptyNumbers(String phone) {
return phone == null || phone.isBlank() || phone.equals(DELIMITER);
return phone == null || phone.isBlank();
}

public boolean isSameValue(String phone) {
Expand Down
18 changes: 16 additions & 2 deletions src/main/java/com/kustacks/kuring/staff/domain/Staff.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ public class Staff {
@Column(name = "lab", length = 64)
private String lab;

@Getter(AccessLevel.PUBLIC)
@Column(name = "position", length = 64)
private String position;

@Embedded
private Phone phone;

Expand All @@ -45,24 +49,26 @@ public class Staff {
private College college;

@Builder
private Staff(String name, String major, String lab, String phone, String email, String dept, String college) {
private Staff(String name, String major, String lab, String phone, String email, String dept, String college, String position) {
this.name = new Name(name);
this.major = major;
this.lab = lab;
this.phone = new Phone(phone);
this.email = new Email(email);
this.dept = dept;
this.college = College.valueOf(college);
this.position = position;
}

public void updateInformation(String name, String major, String lab, String phone, String email, String deptName, String college) {
public void updateInformation(String name, String major, String lab, String phone, String email, String deptName, String college, String position) {
this.name = new Name(name);
this.major = major;
this.lab = lab;
this.phone = new Phone(phone);
this.email = new Email(email);
this.dept = deptName;
this.college = College.valueOf(college);
this.position = position;
}

public String getEmail() {
Expand Down Expand Up @@ -105,6 +111,14 @@ public boolean isSameCollege(String collegeName) {
return this.college == College.valueOf(collegeName);
}

public boolean isSamePosition(String position) {
return this.position.equals(position);
}

public String identifier() {
return String.join(",", getName(), position, dept);
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package com.kustacks.kuring.worker.parser.staff;

import com.kustacks.kuring.worker.scrap.deptinfo.DeptInfo;
import com.kustacks.kuring.worker.scrap.deptinfo.art_design.CommunicationDesignDept;
import com.kustacks.kuring.worker.scrap.deptinfo.art_design.LivingDesignDept;
import com.kustacks.kuring.worker.scrap.deptinfo.real_estate.RealEstateDept;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
Expand All @@ -18,33 +16,22 @@ public class EachDeptStaffHtmlParser extends StaffHtmlParserTemplate {

@Override
public boolean support(DeptInfo deptInfo) {
return !(deptInfo instanceof RealEstateDept) &&
!(deptInfo instanceof LivingDesignDept) &&
!(deptInfo instanceof CommunicationDesignDept);
return !(deptInfo instanceof RealEstateDept);
}

protected Elements selectStaffInfoRows(Document document) {
Element table = document.select(".photo_intro").get(0);
return table.getElementsByTag("dl");
return document.select(".row");
}

protected String[] extractStaffInfoFromRow(Element row) {
Elements infos = row.getElementsByTag("dd");

// 교수명, 직위, 세부전공, 연구실, 연락처, 이메일 순으로 파싱
// 연구실, 연락처 정보는 없는 경우가 종종 있으므로, childNode접근 전 인덱스 체크하는 로직을 넣었음
String name = infos.get(0).getElementsByTag("span").get(1).text();

String jobPosition = String.valueOf(infos.get(1).childNodeSize() < 2 ? "" : infos.get(1).childNode(1));
if (jobPosition.contains("명예") || jobPosition.contains("대우") || jobPosition.contains("휴직") || !jobPosition.contains("교수")) {
log.info("스크래핑 스킵 -> {} 교수", name);
return new String[]{};
}

String major = infos.get(2).childNodeSize() < 2 ? "" : String.valueOf(infos.get(2).childNode(1));
String lab = infos.get(3).childNodeSize() < 2 ? "" : String.valueOf(infos.get(3).childNode(1));
String phone = infos.get(4).childNodeSize() < 2 ? "" : String.valueOf(infos.get(4).childNode(1));
String email = infos.get(5).getElementsByTag("a").get(0).text();
return new String[]{name, major, lab, phone, email};
String name = row.select(".info .title .name").text();

Elements detailElement = row.select(".detail");
String jobPosition = detailElement.select(".ico1 dd").text().trim();
String major = detailElement.select(".ico2 dd").text().trim();
String lab = detailElement.select(".ico3 dd").text().trim();
String extensionNumber = detailElement.select(".ico4 dd").text().trim();
String email = detailElement.select(".ico5 dd").text().trim();
return new String[]{name, jobPosition, major, lab, extensionNumber, email};
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,20 @@ public class RealEstateStaffHtmlParser extends StaffHtmlParserTemplate {
public boolean support(DeptInfo deptInfo) {
return deptInfo instanceof RealEstateDept;
}

protected Elements selectStaffInfoRows(Document document) {
Element table = document.select(".sub0201_list").get(0).getElementsByTag("ul").get(0);
return table.getElementsByTag("li");
return document.select(".row");
}

protected String[] extractStaffInfoFromRow(Element row) {
Element content = row.select(".con").get(0);

String name = content.select("dl > dt > a > strong").get(0).text();
String major = String.valueOf(content.select("dl > dd").get(0).childNode(4)).replaceFirst("\\s", "").trim();

Element textMore = content.select(".text_more").get(0);

String lab = String.valueOf(textMore.childNode(4)).split(":")[1].replaceFirst("\\s", "").trim();
String phone = String.valueOf(textMore.childNode(6)).split(":")[1].replaceFirst("\\s", "").trim();
String email = textMore.getElementsByTag("a").get(0).text();
return new String[]{name, major, lab, phone, email};
String name = row.select(".info .title .name").text();

Elements detalTagElement = row.select(".detail");
String jobPosition = detalTagElement.select("dt:contains(직위) + dd").text();
String major = detalTagElement.select("dt:contains(연구분야) + dd").text().trim();
String lab = detalTagElement.select("dt:contains(연구실) + dd").text().trim();
String extensionNumber = detalTagElement.select("dt:contains(연락처) + dd").text().trim();
String email = detalTagElement.select("dt:contains(이메일) + dd").text().trim();
return new String[]{name, jobPosition, major, lab, extensionNumber, email};
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ private static List<StaffDto> convertStaffDtos(DeptInfo deptInfo, List<String[]>
return parseResult.stream()
.map(oneStaffInfo -> StaffDto.builder()
.name(oneStaffInfo[0])
.major(oneStaffInfo[1])
.lab(oneStaffInfo[2])
.phone(oneStaffInfo[3])
.email(oneStaffInfo[4])
.position(oneStaffInfo[1])
.major(oneStaffInfo[2])
.lab(oneStaffInfo[3])
.phone(oneStaffInfo[4])
.email(oneStaffInfo[5])
.deptName(deptInfo.getDeptName())
.collegeName(deptInfo.getCollegeName()
).build()
Expand Down
Loading

0 comments on commit 997f1e1

Please sign in to comment.