Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

version 2.10.2 #225

Merged
merged 2 commits into from
Dec 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ out/
### API docs ###
**/src/main/resources/static/docs/*

**/ku-stack-firebase-adminsdk-87nwq-5ba04dfc12.json
**/ku-stack-firebase-adminsdk-87nwq-ae6a2df931.json
**/src/main/generated/
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ configurations.all {
}
}

test.onlyIf { System.getenv('DEPLOY_ENV') == 'dev' }
//test.onlyIf { System.getenv('DEPLOY_ENV') == 'dev' }

test {
jacoco {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package com.kustacks.kuring.common.utils.converter;

import java.util.Arrays;
import java.util.regex.Pattern;

public class EmailSupporter {
private static final Pattern AT_PATTERN = Pattern.compile("\\s+at\\s+");
private static final Pattern DOT_PATTERN = Pattern.compile("\\s+dot\\s+");
private static final Pattern EMAIL_PATTERN = Pattern.compile("^[a-zA-Z0-9_!#$%&'\\*+/=?{|}~^.-]+@[a-zA-Z0-9.-]+$");

private static final String KONKUK_DOMAIN = "@konkuk.ac.kr";
private static final String EMPTY_EMAIL = "";

public static boolean isNullOrBlank(String email) {
return email == null || email.isBlank();
}

public static String convertValidEmail(String email) {
if (isNullOrBlank(email)) {
return EMPTY_EMAIL;
}

String[] emailGroups = splitEmails(email);
String[] normalizedEmails = normalizeEmails(emailGroups);

//여러 이메일 중 konkuk을 우선 선택, 없으면 첫번째 내용
return selectPreferredEmail(normalizedEmails);
}

private static String[] splitEmails(String email) {
return email.split("[/,]");
}

private static String[] normalizeEmails(String[] emailGroups) {
return Arrays.stream(emailGroups)
.map(EmailSupporter::normalizeEmail)
.toArray(String[]::new);
}

private static String normalizeEmail(String email) {
if (EMAIL_PATTERN.matcher(email).matches()) {
return email;
}

if (containsSubstitutePatterns(email)) {
return replaceSubstitutePatterns(email);
}

return EMPTY_EMAIL;
}

private static String replaceSubstitutePatterns(String email) {
return email.replaceAll(DOT_PATTERN.pattern(), ".")
.replaceAll(AT_PATTERN.pattern(), "@");
}

private static boolean containsSubstitutePatterns(String email) {
return DOT_PATTERN.matcher(email).find() && AT_PATTERN.matcher(email).find();
}

// Konkuk 도메인 우선 선택
private static String selectPreferredEmail(String[] emails) {
return Arrays.stream(emails)
.filter(email -> email.endsWith(KONKUK_DOMAIN))
.findFirst()
.orElseGet(() -> emails.length > 0 ? emails[0] : EMPTY_EMAIL);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package com.kustacks.kuring.common.utils.converter;

import java.util.regex.Pattern;

public class PhoneNumberSupporter {

private static final Pattern LAST_FOUR_NUMBER_PATTERN = Pattern.compile("\\d{4}");
private static final Pattern FULL_NUMBER_PATTERN = Pattern.compile("02-\\d{3,4}-\\d{4}");
private static final Pattern FULL_NUMBER_WITH_PARENTHESES_PATTERN = Pattern.compile("02[)]\\d{3,4}-\\d{4}");

private static final String EMPTY_PHONE = "";

public static boolean isNullOrBlank(String number) {
return number == null || number.isBlank();
}

public static String convertFullExtensionNumber(String number) {
if (isNullOrBlank(number)) {
return EMPTY_PHONE;
}

if (FULL_NUMBER_PATTERN.matcher(number).matches()) {
return number;
}
if (containsLastFourNumber(number)) {
return "02-450-" + number;
}
if (containsParenthesesPattern(number)) {
return number.replace(")", "-");
}

return EMPTY_PHONE;
}

private static boolean containsLastFourNumber(String number) {
return LAST_FOUR_NUMBER_PATTERN.matcher(number).matches();
}

private static boolean containsParenthesesPattern(String number) {
return FULL_NUMBER_WITH_PARENTHESES_PATTERN.matcher(number).matches();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
import com.kustacks.kuring.staff.domain.Staff;
import org.springframework.data.jpa.repository.JpaRepository;

import java.util.List;

public interface StaffRepository extends JpaRepository<Staff, Long>, StaffQueryRepository {

List<Staff> findByDeptContaining(String deptName);
}
3 changes: 2 additions & 1 deletion src/main/java/com/kustacks/kuring/staff/domain/Email.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public Email(String email) {
}

private boolean isValidEmail(String email) {
return !Objects.isNull(email) && patternMatches(email);
return Objects.nonNull(email) &&
(patternMatches(email) || Objects.equals(email,""));
}

private boolean patternMatches(String email) {
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/com/kustacks/kuring/staff/domain/Phone.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ public class Phone {
= Pattern.compile("(\\d{3,4})[-\\s]*(\\d{4})");
private static final String SEOUL_AREA_CODE = "02";
private static final String DELIMITER = "-";
private static final String EMPTY_NUMBER = "";

@Column(name = "phone", length = 64)
private String value;

public Phone(String phone) {
if(isEmptyNumbers(phone)) {
this.value = DELIMITER;
this.value = EMPTY_NUMBER;
return;
}

Expand Down Expand Up @@ -71,7 +72,7 @@ private boolean isValidNumbersAndSet(String phone) {
}

private static boolean isEmptyNumbers(String phone) {
return phone == null || phone.isBlank() || phone.equals(DELIMITER);
return phone == null || phone.isBlank();
}

public boolean isSameValue(String phone) {
Expand Down
18 changes: 16 additions & 2 deletions src/main/java/com/kustacks/kuring/staff/domain/Staff.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ public class Staff {
@Column(name = "lab", length = 64)
private String lab;

@Getter(AccessLevel.PUBLIC)
@Column(name = "position", length = 64)
private String position;

@Embedded
private Phone phone;

Expand All @@ -45,24 +49,26 @@ public class Staff {
private College college;

@Builder
private Staff(String name, String major, String lab, String phone, String email, String dept, String college) {
private Staff(String name, String major, String lab, String phone, String email, String dept, String college, String position) {
this.name = new Name(name);
this.major = major;
this.lab = lab;
this.phone = new Phone(phone);
this.email = new Email(email);
this.dept = dept;
this.college = College.valueOf(college);
this.position = position;
}

public void updateInformation(String name, String major, String lab, String phone, String email, String deptName, String college) {
public void updateInformation(String name, String major, String lab, String phone, String email, String deptName, String college, String position) {
this.name = new Name(name);
this.major = major;
this.lab = lab;
this.phone = new Phone(phone);
this.email = new Email(email);
this.dept = deptName;
this.college = College.valueOf(college);
this.position = position;
}

public String getEmail() {
Expand Down Expand Up @@ -105,6 +111,14 @@ public boolean isSameCollege(String collegeName) {
return this.college == College.valueOf(collegeName);
}

public boolean isSamePosition(String position) {
return this.position.equals(position);
}

public String identifier() {
return String.join(",", getName(), position, dept);
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package com.kustacks.kuring.worker.parser.staff;

import com.kustacks.kuring.worker.scrap.deptinfo.DeptInfo;
import com.kustacks.kuring.worker.scrap.deptinfo.art_design.CommunicationDesignDept;
import com.kustacks.kuring.worker.scrap.deptinfo.art_design.LivingDesignDept;
import com.kustacks.kuring.worker.scrap.deptinfo.real_estate.RealEstateDept;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
Expand All @@ -18,33 +16,22 @@ public class EachDeptStaffHtmlParser extends StaffHtmlParserTemplate {

@Override
public boolean support(DeptInfo deptInfo) {
return !(deptInfo instanceof RealEstateDept) &&
!(deptInfo instanceof LivingDesignDept) &&
!(deptInfo instanceof CommunicationDesignDept);
return !(deptInfo instanceof RealEstateDept);
}

protected Elements selectStaffInfoRows(Document document) {
Element table = document.select(".photo_intro").get(0);
return table.getElementsByTag("dl");
return document.select(".row");
}

protected String[] extractStaffInfoFromRow(Element row) {
Elements infos = row.getElementsByTag("dd");

// 교수명, 직위, 세부전공, 연구실, 연락처, 이메일 순으로 파싱
// 연구실, 연락처 정보는 없는 경우가 종종 있으므로, childNode접근 전 인덱스 체크하는 로직을 넣었음
String name = infos.get(0).getElementsByTag("span").get(1).text();

String jobPosition = String.valueOf(infos.get(1).childNodeSize() < 2 ? "" : infos.get(1).childNode(1));
if (jobPosition.contains("명예") || jobPosition.contains("대우") || jobPosition.contains("휴직") || !jobPosition.contains("교수")) {
log.info("스크래핑 스킵 -> {} 교수", name);
return new String[]{};
}

String major = infos.get(2).childNodeSize() < 2 ? "" : String.valueOf(infos.get(2).childNode(1));
String lab = infos.get(3).childNodeSize() < 2 ? "" : String.valueOf(infos.get(3).childNode(1));
String phone = infos.get(4).childNodeSize() < 2 ? "" : String.valueOf(infos.get(4).childNode(1));
String email = infos.get(5).getElementsByTag("a").get(0).text();
return new String[]{name, major, lab, phone, email};
String name = row.select(".info .title .name").text();

Elements detailElement = row.select(".detail");
String jobPosition = detailElement.select(".ico1 dd").text().trim();
String major = detailElement.select(".ico2 dd").text().trim();
String lab = detailElement.select(".ico3 dd").text().trim();
String extensionNumber = detailElement.select(".ico4 dd").text().trim();
String email = detailElement.select(".ico5 dd").text().trim();
return new String[]{name, jobPosition, major, lab, extensionNumber, email};
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,20 @@ public class RealEstateStaffHtmlParser extends StaffHtmlParserTemplate {
public boolean support(DeptInfo deptInfo) {
return deptInfo instanceof RealEstateDept;
}

protected Elements selectStaffInfoRows(Document document) {
Element table = document.select(".sub0201_list").get(0).getElementsByTag("ul").get(0);
return table.getElementsByTag("li");
return document.select(".row");
}

protected String[] extractStaffInfoFromRow(Element row) {
Element content = row.select(".con").get(0);

String name = content.select("dl > dt > a > strong").get(0).text();
String major = String.valueOf(content.select("dl > dd").get(0).childNode(4)).replaceFirst("\\s", "").trim();

Element textMore = content.select(".text_more").get(0);

String lab = String.valueOf(textMore.childNode(4)).split(":")[1].replaceFirst("\\s", "").trim();
String phone = String.valueOf(textMore.childNode(6)).split(":")[1].replaceFirst("\\s", "").trim();
String email = textMore.getElementsByTag("a").get(0).text();
return new String[]{name, major, lab, phone, email};
String name = row.select(".info .title .name").text();

Elements detalTagElement = row.select(".detail");
String jobPosition = detalTagElement.select("dt:contains(직위) + dd").text();
String major = detalTagElement.select("dt:contains(연구분야) + dd").text().trim();
String lab = detalTagElement.select("dt:contains(연구실) + dd").text().trim();
String extensionNumber = detalTagElement.select("dt:contains(연락처) + dd").text().trim();
String email = detalTagElement.select("dt:contains(이메일) + dd").text().trim();
return new String[]{name, jobPosition, major, lab, extensionNumber, email};
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ private static List<StaffDto> convertStaffDtos(DeptInfo deptInfo, List<String[]>
return parseResult.stream()
.map(oneStaffInfo -> StaffDto.builder()
.name(oneStaffInfo[0])
.major(oneStaffInfo[1])
.lab(oneStaffInfo[2])
.phone(oneStaffInfo[3])
.email(oneStaffInfo[4])
.position(oneStaffInfo[1])
.major(oneStaffInfo[2])
.lab(oneStaffInfo[3])
.phone(oneStaffInfo[4])
.email(oneStaffInfo[5])
.deptName(deptInfo.getDeptName())
.collegeName(deptInfo.getCollegeName()
).build()
Expand Down
Loading
Loading