본문 바로가기

Monew

S3에 백업한 뉴스 기사를 '복구'하는 기능

(S3에 저장된) 백업 데이터와 DB에 저장된 현재 데이터를 비교해 유실된 데이터를 새로 등록합니다.

 

 

article/backUp/aws/BackupKeyMaker.java

package com.codeit.monew.article.backUp.aws;

import java.time.LocalDate;

public class BackupKeyMaker {
    // S3에 백업파일을 저장할 때 파일 경로(키)를 만들어주는 유틸리티
    private BackupKeyMaker() {}

    public static String keyFor(LocalDate date) {
        String y = "%04d".formatted(date.getYear());
        String m = "%02d".formatted(date.getMonthValue());
        String d = "%02d".formatted(date.getDayOfMonth());
        return "backups/articles/%s/%s/%s/articles-%s-%s-%s.jsonl.gz"
                .formatted(y,m,d,y,m,d); // backups/articles/2025/09/16/articles-2025-09-16.jsonl.gz
    }

    // 위 파일명을 임시 업로드용 파일명으로 바꾸는 메서드
    // 임시 키(.part)로 먼저 업로드하고, 업로드 성공하면 최종 키(.json.gz)로 복사 후 임시파일 삭제
    public static String tempKeyFor(LocalDate date, String uuid) {
        return keyFor(date).replace(".jsonl.gz", "." + uuid + ".part");
    }
}

 

 

 

article/backUp/aws/S3Config

package com.codeit.monew.article.backUp.aws;

import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;

@Configuration
public class S3Config {
    @Value("${aws.s3.region}")
    private String region;
    @Bean
    public S3Client s3Client() {
        return S3Client.builder().region(Region.AP_NORTHEAST_2).build();
    }
}

 

 

article/backUp/controller/ArticleRestoreController

package com.codeit.monew.article.backUp.controller;

import com.codeit.monew.article.backUp.dto.ArticleRestoreResultDto;
import com.codeit.monew.article.backUp.service.ArticleRestoreService;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

import java.util.List;

@RestController
@RequiredArgsConstructor
@RequestMapping("/api/articles")
public class ArticleRestoreController {
    private final ArticleRestoreService articleRestoreService;

    @GetMapping("/restore")
    public ResponseEntity<List<ArticleRestoreResultDto>> getArticleRestoreResults(
            @RequestParam("from") String from, // 시작지점
            @RequestParam("to") String to // 끝지점
    ) {
        List<ArticleRestoreResultDto> restore = articleRestoreService.restoreArticle(from, to);
        return ResponseEntity.ok(restore);
    }
}

 

 

article/backUp/dto/ArticleRestoreResultDto

package com.codeit.monew.article.backUp.dto;

import java.time.LocalDateTime;
import java.util.List;
import java.util.UUID;

public record ArticleRestoreResultDto(
        LocalDateTime restoreDate, // String
        List<UUID> restoredArticleIds,
        long restoredArticleCount // int64
) {
}

 

 

 

article/backUp/scheduler/BackupScheduler

package com.codeit.monew.article.backUp.scheduler;

import com.codeit.monew.article.backUp.service.ArticleBackupService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;

import java.time.LocalDate;
import java.time.ZoneId;


@Component
@RequiredArgsConstructor
@Slf4j
public class BackupScheduler {
    private final ArticleBackupService backupService;

    // 초, 분, 시, 일, 월, 요일
    // 매일 0시, 전날의 데이터를 백업함
    @Scheduled(cron = "0 * * * * *", zone = "Asia/Seoul")
    public void backupArticle() {
        LocalDate target = LocalDate.now(ZoneId.of("Asia/Seoul"));
//        .minusDays(1)
        log.info("뉴스기사 백업 시작, {}", target);
        backupService.articleBackup(target);
    }
}

 

 

 

article/backUp/service/ArticleRestoreService

package com.codeit.monew.article.backUp.service;

import com.codeit.monew.article.backUp.aws.BackupKeyMaker;
import com.codeit.monew.article.backUp.dto.ArticleBackupDto;
import com.codeit.monew.article.backUp.dto.ArticleRestoreResultDto;
import com.codeit.monew.article.entity.Article;
import com.codeit.monew.article.repository.ArticleRepository;
import com.codeit.monew.interest.entity.Interest;
import com.codeit.monew.interest.repository.InterestRepository;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
import software.amazon.awssdk.services.s3.model.NoSuchKeyException;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.zip.GZIPInputStream;


@Service
@RequiredArgsConstructor
@Slf4j
public class ArticleRestoreService {

//    private static final int BATCH_SIZE = 1000;

    private final ArticleRepository articleRepository;
    private final InterestRepository interestRepository;
    private final S3Client s3;

    @Value("${app.backup.bucket}")
    String bucket;

    private final ObjectMapper objectMapper = new ObjectMapper()
            .registerModule(new JavaTimeModule())
            .disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);

    // from, to가 들어오면 날짜로 바꿔서 일단위로 복구하기(백업을 일단위로 했기 때문에)
    @Transactional
    public List<ArticleRestoreResultDto> restoreArticle(String fromStr, String toStr) {
        LocalDate from = parseToLocalDate(fromStr); // 시작점
        LocalDate to = parseToLocalDate(toStr); // 끝
        List<ArticleRestoreResultDto> results = new ArrayList<>();

        // 끝이 시작점보다 크면 예외 발생
        if(to.isBefore(from)) {
            throw new IllegalArgumentException("값이 잘못되었습니다. to는 from보다 커야합니다.");
        }

        long totalInserted = 0;
        List<String> details = new ArrayList<>();

        for(LocalDate d = from; !d.isAfter(to); d = d.plusDays(1)) {
            List<Article> targetArticles = restoreOneDate(d);
            List<UUID> restoredIds = new ArrayList<>();

            for(Article article : targetArticles) {
                Optional<Article> checkArticle = articleRepository.findById(article.getId());
                // Article 중복검사, Deleted가 false인 애들만 필터링

                if(checkArticle.isPresent()) { // article이 있을 때
                    if (article.isDeleted()) { // true이면, 지워진거니까 deleted를 false로 바꾸기
                        checkArticle.get().setDeleted(false);
                        articleRepository.save(checkArticle.get());
                        restoredIds.add(article.getId());
                    }
                } else { // checkArticle에서 Article이 없을 때, checkArticle.isPresent() = false이면 (hard Delete)
                    Article newArticle = Article.builder()
                            .source(article.getSource())
                            .sourceUrl(article.getSourceUrl())
                            .articleTitle(article.getArticleTitle())
                            .articlePublishDate(article.getArticlePublishDate())
                            .articleSummary(article.getArticleSummary())
                            .articleCommentCount(0)
                            .articleViewCount(0)
                            .deleted(false)
                            .interest(article.getInterest())
                            .build();
//                    article.setDeleted(false);
                    articleRepository.save(newArticle); // 이러면 null값을 추가하는것이다
                    restoredIds.add(newArticle.getId());
                }
            }
            results.add(new ArticleRestoreResultDto(
                    LocalDateTime.now(),
                    restoredIds,
                    (long) restoredIds.size()
            ));
        }
        return results;
    }

    /**
     * 아래는 도우미 메서드
     */

    private boolean isArticleDuplicated(Article article) {
        Optional<Article> articleOptional = articleRepository.findBySourceUrl(article.getSourceUrl()); // 없어야 됨
        if (articleOptional.isPresent()) {
            return true;
        }
        return false;
    }

    private long flushBatch(List<Article> batch) {
        long n = articleRepository.saveAll(batch).size();
        batch.clear();
        return n;
    }

//    private Interest resolveInterest(String interestId) { // interest에서 UUID인 id를 찾고 String으로 반환해줌
//        if(interestId == null || interestId.isBlank()) {
//            return null;
//        }
//
//        try {
//            return interestRepository.findById(UUID.fromString(interestId)).orElse(null);
//        } catch(IllegalArgumentException e) {
//            return null;
//        }
//    }

    @Transactional
    public List<Article> restoreOneDate(LocalDate date) {
        String key = BackupKeyMaker.keyFor(date);
//        long inserted = 0;
        List<Article> result = new ArrayList<>();

        try(var s3is = s3.getObject(GetObjectRequest.builder().bucket(bucket).key(key).build()); // 키를 가져옴
            var gis = new GZIPInputStream(s3is); // 스트림
            var br = new BufferedReader(new InputStreamReader(gis, StandardCharsets.UTF_8)) // 버퍼
                ) {
//            List<Article> batch = new ArrayList<>(BATCH_SIZE);
            String line;

            HeadObjectResponse head = s3.headObject(b -> b.bucket(bucket).key(key));
            log.info("S3 object size={} bytes, key={}", head.contentLength(), key);

            log.info("br 메세지를 찾기 위한 더미 텍스트 : {}", br.readLine());
            while ((line = br.readLine()) != null) {
                ArticleBackupDto dto = objectMapper.readValue(line,ArticleBackupDto.class);
                log.info("dtoURL: {}", dto.sourceUrl());

                Optional<Interest> interest = interestRepository.findById(dto.interestId());
                if(interest.isEmpty()) { // 관심사가 이미 지워진 상태일 땐, 복구하지 않는다
                    continue;
                }
                // 엔티티 채워 넣기
                Article entity = Article.builder()
                        .id(dto.id())
                        .createdAt(dto.createdAt())
                        .source(dto.source())
                        .sourceUrl(dto.sourceUrl())
                        .articleTitle(dto.articleTitle())
                        .articlePublishDate(dto.articlePublishDate())
                        .articleSummary(dto.articleSummary())
                        .articleCommentCount(dto.articleCommentCount())
                        .articleViewCount(dto.articleViewCount())
                        .deleted(dto.deleted())
                        .interest(interest.get())
                        .build();

                result.add(entity);
            }
            log.info("Restore {}: inserted={}", date, result);
            return result;

        } catch (NoSuchKeyException e) {
            log.warn("No backup file for {} at s3://{}/{}", date, bucket, key);
            return result;
        } catch (Exception e) {
            log.error("Restore {} failed", date, e);
            throw new RuntimeException(e);
        }
    }

    private static LocalDate parseToLocalDate(String dateStr) {
        ZoneId KST = ZoneId.of("Asia/Seoul");
        try {
            return OffsetDateTime.parse(dateStr).atZoneSameInstant(KST).toLocalDate();
        } catch (Exception e) {}
        try {
            return LocalDateTime.parse(dateStr).atZone(KST).toLocalDate();
        } catch(Exception e) {}
        try {
            return LocalDate.parse(dateStr);
        } catch(Exception e) {}
        throw new IllegalArgumentException("DateTime 찾을 수 없음" + dateStr);
    }

}

 

 

 

article/repository/ArticleRepository

package com.codeit.monew.article.repository;

import com.codeit.monew.article.entity.Article;
import com.codeit.monew.interest.entity.Interest;

import java.time.LocalDateTime;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Repository;

@Repository
public interface ArticleRepository extends JpaRepository<Article, UUID>, ArticleRepositoryCustom {

  Optional<Article> findTop1ByInterestOrderByArticlePublishDateDesc(Interest interest);
  Optional<Article> findBySourceUrl(String sourceUrl);

  @Query("SELECT DISTINCT a.source FROM Article a")
  List<String> findDistinctSources();

  int countBySource(String source);

  /**
   * 뉴스기사 복구 쿼리
   */
  boolean existsBySourceUrl(String sourceUrl);

  List<Article> findAllByArticlePublishDateBetween(LocalDateTime dayStart, LocalDateTime dayEnd);
}

 

'Monew' 카테고리의 다른 글

개인 개발 리포트  (1) 2025.09.22
CD(지속적 배포) 작성  (0) 2025.09.19
S3 버킷에 데이터를 '백업'하는 로직  (0) 2025.09.16
CI(지속적 통합) 작성  (0) 2025.09.12
build.gradle  (0) 2025.09.04