From 42ac6729ba33834a5a337e37a589da0c460c4b5f Mon Sep 17 00:00:00 2001
From: Inah Jeon <inahjeon5324@gmail.com>
Date: Wed, 8 Jan 2025 21:02:51 +0900
Subject: [PATCH] [SAAS-511] Update Model Names and Remove Deprecated Models
 (#40)

* rename solar models

* remove deprecated solar docvision model

* remove deprecated layout analysis model

* change version
---
 libs/upstage/README.md                        |   8 +-
 libs/upstage/langchain_upstage/__init__.py    |   4 -
 libs/upstage/langchain_upstage/chat_models.py |  11 +-
 .../langchain_upstage/layout_analysis.py      | 259 -----------
 .../layout_analysis_parsers.py                | 425 ------------------
 .../tools/groundedness_check.py               |   2 +-
 libs/upstage/pyproject.toml                   |   2 +-
 .../test_chat_models_standard.py              |   2 +-
 .../integration_tests/test_layout_analysis.py | 104 -----
 .../tests/unit_tests/test_chat_models.py      |   8 +-
 .../unit_tests/test_chat_models_standard.py   |   2 +-
 libs/upstage/tests/unit_tests/test_imports.py |   2 -
 .../tests/unit_tests/test_layout_analysis.py  | 247 ----------
 13 files changed, 17 insertions(+), 1059 deletions(-)
 delete mode 100644 libs/upstage/langchain_upstage/layout_analysis.py
 delete mode 100644 libs/upstage/langchain_upstage/layout_analysis_parsers.py
 delete mode 100644 libs/upstage/tests/integration_tests/test_layout_analysis.py
 delete mode 100644 libs/upstage/tests/unit_tests/test_layout_analysis.py

diff --git a/libs/upstage/README.md b/libs/upstage/README.md
index 8f95ced..4dcd7a4 100644
--- a/libs/upstage/README.md
+++ b/libs/upstage/README.md
@@ -24,17 +24,17 @@ See a [usage example](https://python.langchain.com/docs/integrations/text_embedd
 Use `solar-embedding-1-large` model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name.
 `UpstageEmbeddings` will automatically add the suffixes based on the method called.
 
-## Layout Analysis Loader
+## Document Parse Loader
 
 See a [usage example](https://python.langchain.com/v0.1/docs/integrations/document_loaders/upstage/)
 
-The `use_ocr` option determines whether OCR will be used for text extraction from documents. If this option is not specified, the default policy of the [Upstage Layout Analysis API](https://developers.upstage.ai/docs/apis/layout-analysis#request-body) service will be applied. When `use_ocr` is set to `True`, OCR is utilized to extract text. In the case of PDF documents, this involves converting the PDF into images before performing OCR. Conversely, if `use_ocr` is set to `False` for PDF documents, the text information embedded within the PDF is used directly. However, if the input document is not a PDF, such as an image, setting `use_ocr` to `False` will result in an error.
+The `use_ocr` option determines whether OCR will be used for text extraction from documents. If this option is not specified, the default policy of the [Upstage Document Parse API](https://console.upstage.ai/docs/capabilities/document-parse#request) service will be applied. When `use_ocr` is set to `True`, OCR is utilized to extract text. In the case of PDF documents, this involves converting the PDF into images before performing OCR. Conversely, if `use_ocr` is set to `False` for PDF documents, the text information embedded within the PDF is used directly. However, if the input document is not a PDF, such as an image, setting `use_ocr` to `False` will result in an error.
 
 ```python
-from langchain_upstage import UpstageLayoutAnalysisLoader
+from langchain_upstage import UpstageDocumentParseLoader
 
 file_path = "/PATH/TO/YOUR/FILE.image"
-layzer = UpstageLayoutAnalysisLoader(file_path, split="page")
+layzer = UpstageDocumentParseLoader(file_path, split="page")
 
 # For improved memory efficiency, consider using the lazy_load method to load documents page by page.
 docs = layzer.load()  # or layzer.lazy_load()
diff --git a/libs/upstage/langchain_upstage/__init__.py b/libs/upstage/langchain_upstage/__init__.py
index 8e94219..b91aeb3 100644
--- a/libs/upstage/langchain_upstage/__init__.py
+++ b/libs/upstage/langchain_upstage/__init__.py
@@ -2,8 +2,6 @@
 from langchain_upstage.document_parse import UpstageDocumentParseLoader
 from langchain_upstage.document_parse_parsers import UpstageDocumentParseParser
 from langchain_upstage.embeddings import UpstageEmbeddings
-from langchain_upstage.layout_analysis import UpstageLayoutAnalysisLoader
-from langchain_upstage.layout_analysis_parsers import UpstageLayoutAnalysisParser
 from langchain_upstage.tools.groundedness_check import (
     GroundednessCheck,
     UpstageGroundednessCheck,
@@ -12,10 +10,8 @@
 __all__ = [
     "ChatUpstage",
     "UpstageEmbeddings",
-    "UpstageLayoutAnalysisLoader",
     "UpstageDocumentParseLoader",
     "UpstageDocumentParseParser",
-    "UpstageLayoutAnalysisParser",
     "UpstageGroundednessCheck",
     "GroundednessCheck",
 ]
diff --git a/libs/upstage/langchain_upstage/chat_models.py b/libs/upstage/langchain_upstage/chat_models.py
index 5ad1a45..0f108e2 100644
--- a/libs/upstage/langchain_upstage/chat_models.py
+++ b/libs/upstage/langchain_upstage/chat_models.py
@@ -56,8 +56,7 @@
 DOC_PARSING_MODEL = ["solar-pro"]
 SOLAR_TOKENIZERS = {
     "solar-pro": "upstage/solar-pro-tokenizer",
-    "solar-1-mini-chat": "upstage/solar-1-mini-tokenizer",
-    "solar-docvision": "upstage/solar-docvision-preview-tokenizer",
+    "solar-mini": "upstage/solar-1-mini-tokenizer",
 }
 
 
@@ -105,7 +104,7 @@ def _get_ls_params(
         params["ls_provider"] = "upstage"
         return params
 
-    model_name: str = Field(default="solar-1-mini-chat", alias="model")
+    model_name: str = Field(default="solar-mini", alias="model")
     """Model name to use."""
     upstage_api_key: SecretStr = Field(
         default_factory=secret_from_env(
@@ -384,7 +383,7 @@ class AnswerWithJustification(BaseModel):
                     justification: str
 
 
-                llm = ChatUpstage(model="solar-1-mini-chat", temperature=0)
+                llm = ChatUpstage(model="solar-mini", temperature=0)
                 structured_llm = llm.with_structured_output(AnswerWithJustification)
 
                 structured_llm.invoke(
@@ -410,7 +409,7 @@ class AnswerWithJustification(BaseModel):
                     justification: str
 
 
-                llm = ChatUpstage(model="solar-1-mini-chat", temperature=0)
+                llm = ChatUpstage(model="solar-mini", temperature=0)
                 structured_llm = llm.with_structured_output(
                     AnswerWithJustification, include_raw=True
                 )
@@ -440,7 +439,7 @@ class AnswerWithJustification(BaseModel):
 
 
                 dict_schema = convert_to_openai_tool(AnswerWithJustification)
-                llm = ChatUpstage(model="solar-1-mini-chat", temperature=0)
+                llm = ChatUpstage(model="solar-mini", temperature=0)
                 structured_llm = llm.with_structured_output(dict_schema)
 
                 structured_llm.invoke(
diff --git a/libs/upstage/langchain_upstage/layout_analysis.py b/libs/upstage/langchain_upstage/layout_analysis.py
deleted file mode 100644
index 2484c8b..0000000
--- a/libs/upstage/langchain_upstage/layout_analysis.py
+++ /dev/null
@@ -1,259 +0,0 @@
-import os
-import warnings
-from pathlib import Path
-from typing import Any, Dict, Iterator, List, Literal, Optional, Union
-
-from langchain_core.document_loaders import BaseLoader, Blob
-from langchain_core.documents import Document
-
-from .layout_analysis_parsers import UpstageLayoutAnalysisParser
-
-DEFAULT_PAGE_BATCH_SIZE = 10
-
-OutputType = Literal["text", "html"]
-SplitType = Literal["none", "element", "page"]
-
-
-def validate_api_key(api_key: str) -> None:
-    """
-    Validates the provided API key.
-
-    Args:
-        api_key (str): The API key to be validated.
-
-    Raises:
-        ValueError: If the API key is empty or None.
-
-    Returns:
-        None
-    """
-    if not api_key:
-        raise ValueError("API Key is required for Upstage Document Loader")
-
-
-def validate_file_path(file_path: Union[str, Path, List[str], List[Path]]) -> None:
-    """
-    Validates if a file exists at the given file path.
-
-    Args:
-        file_path (Union[str, Path, List[str], List[Path]): The file path(s) to be
-                                                            validated.
-
-    Raises:
-        FileNotFoundError: If the file or any of the files in the list do not exist.
-    """
-    if isinstance(file_path, list):
-        for path in file_path:
-            validate_file_path(path)
-        return
-    if not os.path.exists(file_path):
-        raise FileNotFoundError(f"File not found: {file_path}")
-
-
-def get_from_param_or_env(
-    key: str,
-    param: Optional[str] = None,
-    env_key: Optional[str] = None,
-    default: Optional[str] = None,
-) -> str:
-    """Get a value from a param or an environment variable."""
-    if param is not None:
-        return param
-    elif env_key and env_key in os.environ and os.environ[env_key]:
-        return os.environ[env_key]
-    elif default is not None:
-        return default
-    else:
-        raise ValueError(
-            f"Did not find {key}, please add an environment variable"
-            f" `{env_key}` which contains it, or pass"
-            f"  `{key}` as a named parameter."
-        )
-
-
-class UpstageLayoutAnalysisLoader(BaseLoader):
-    """Upstage Layout Analysis.
-
-    To use, you should have the environment variable `UPSTAGE_API_KEY`
-    set with your API key or pass it as a named parameter to the constructor.
-
-    Example:
-        .. code-block:: python
-
-            from langchain_upstage import UpstageLayoutAnalysis
-
-            file_path = "/PATH/TO/YOUR/FILE.pdf"
-            loader = UpstageLayoutAnalysis(
-                        file_path, split="page", output_type="text"
-                     )
-    """
-
-    def __init__(
-        self,
-        file_path: Union[str, Path, List[str], List[Path]],
-        output_type: Union[OutputType, dict] = "html",
-        split: SplitType = "none",
-        api_key: Optional[str] = None,
-        use_ocr: Optional[bool] = None,
-        exclude: list = ["header", "footer"],
-    ):
-        """
-        Initializes an instance of the Upstage document loader.
-
-        Args:
-            file_path (Union[str, Path, List[str], List[Path]): The path to the document
-                                                                to be loaded.
-            output_type (Union[OutputType, dict], optional): The type of output to be
-                                                             generated by the parser.
-                                                             Defaults to "html".
-            split (SplitType, optional): The type of splitting to be applied.
-                                         Defaults to "none" (no splitting).
-            api_key (str, optional): The API key for accessing the Upstage API.
-                                     Defaults to None, in which case it will be
-                                     fetched from the environment variable
-                                     `UPSTAGE_API_KEY`.
-            use_ocr (bool, optional): Extract text from images in the document using
-                                      OCR. If the value is True, OCR is used to extract
-                                      text from an image. If the value is False, text is
-                                      extracted from a PDF. (An error will occur if the
-                                      value is False and the input is NOT in PDF format)
-                                      The default value is None, and the default
-                                      behavior will be performed based on the API's
-                                      policy if no value is specified. Please check https://developers.upstage.ai/docs/apis/layout-analysis#request-body.
-            exclude (list, optional): Exclude specific elements from
-                                                     the output.
-                                                     Defaults to ["header", "footer"].
-        """
-        self.file_path = file_path
-        self.output_type = output_type
-        self.split = split
-        if deprecated_key := os.environ.get("UPSTAGE_DOCUMENT_AI_API_KEY"):
-            warnings.warn(
-                "UPSTAGE_DOCUMENT_AI_API_KEY is deprecated."
-                "Please use UPSTAGE_API_KEY instead."
-            )
-        warnings.warn(
-            "UpstageLayoutAnalysisLoader is deprecated."
-            "Please use langchain_upstage.document_parse.UpstageDocumentParseLoader"
-            " instead."
-        )
-
-        self.api_key = get_from_param_or_env(
-            "UPSTAGE_API_KEY", api_key, "UPSTAGE_API_KEY", deprecated_key
-        )
-        self.use_ocr = use_ocr
-        self.exclude = exclude
-
-        validate_file_path(self.file_path)
-        validate_api_key(self.api_key)
-
-    def load(self) -> List[Document]:
-        """
-        Loads and parses the document using the UpstageLayoutAnalysisParser.
-
-        Returns:
-            A list of Document objects representing the parsed layout analysis.
-        """
-
-        if isinstance(self.file_path, list):
-            result = []
-
-            for file_path in self.file_path:
-                blob = Blob.from_path(file_path)
-
-                parser = UpstageLayoutAnalysisParser(
-                    self.api_key,
-                    split=self.split,
-                    output_type=self.output_type,
-                    use_ocr=self.use_ocr,
-                    exclude=self.exclude,
-                )
-                result.extend(list(parser.lazy_parse(blob, is_batch=True)))
-
-            return result
-
-        else:
-            blob = Blob.from_path(self.file_path)
-
-            parser = UpstageLayoutAnalysisParser(
-                self.api_key,
-                split=self.split,
-                output_type=self.output_type,
-                use_ocr=self.use_ocr,
-                exclude=self.exclude,
-            )
-            return list(parser.lazy_parse(blob, is_batch=True))
-
-    def lazy_load(self) -> Iterator[Document]:
-        """
-        Lazily loads and parses the document using the UpstageLayoutAnalysisParser.
-
-        Returns:
-            An iterator of Document objects representing the parsed layout analysis.
-        """
-
-        if isinstance(self.file_path, list):
-            for file_path in self.file_path:
-                blob = Blob.from_path(file_path)
-
-                parser = UpstageLayoutAnalysisParser(
-                    self.api_key,
-                    split=self.split,
-                    output_type=self.output_type,
-                    use_ocr=self.use_ocr,
-                    exclude=self.exclude,
-                )
-                yield from parser.lazy_parse(blob, is_batch=True)
-        else:
-            blob = Blob.from_path(self.file_path)
-
-            parser = UpstageLayoutAnalysisParser(
-                self.api_key,
-                split=self.split,
-                output_type=self.output_type,
-                use_ocr=self.use_ocr,
-                exclude=self.exclude,
-            )
-            yield from parser.lazy_parse(blob)
-
-    def merge_and_split(
-        self, documents: List[Document], splitter: Optional[object] = None
-    ) -> List[Document]:
-        """
-        Merges the page content and metadata of multiple documents into a single
-        document, or splits the documents using a custom splitter.
-
-        Args:
-            documents (list): A list of Document objects to be merged and split.
-            splitter (object, optional): An optional splitter object that implements the
-                `split_documents` method. If provided, the documents will be split using
-                this splitter. Defaults to None, in which case the documents are merged.
-
-        Returns:
-            list: A list of Document objects. If no splitter is provided, a single
-            Document object is returned with the merged content and combined metadata.
-            If a splitter is provided, the documents are split and a list of Document
-            objects is returned.
-
-        Raises:
-            AssertionError: If a splitter is provided but it does not implement the
-            `split_documents` method.
-        """
-        if splitter is None:
-            merged_content = " ".join([doc.page_content for doc in documents])
-
-            metadatas: Dict[str, Any] = dict()
-            for _meta in [doc.metadata for doc in documents]:
-                for key, value in _meta.items():
-                    if key in metadatas:
-                        metadatas[key].append(value)
-                    else:
-                        metadatas[key] = [value]
-
-            return [Document(page_content=merged_content, metadata=metadatas)]
-        else:
-            assert hasattr(
-                splitter, "split_documents"
-            ), "splitter must implement split_documents method"
-
-            return splitter.split_documents(documents)
diff --git a/libs/upstage/langchain_upstage/layout_analysis_parsers.py b/libs/upstage/langchain_upstage/layout_analysis_parsers.py
deleted file mode 100644
index 41f3ffe..0000000
--- a/libs/upstage/langchain_upstage/layout_analysis_parsers.py
+++ /dev/null
@@ -1,425 +0,0 @@
-import io
-import json
-import logging
-import os
-import warnings
-from typing import Dict, Iterator, List, Literal, Optional, Union
-
-import requests
-from langchain_core.document_loaders import BaseBlobParser, Blob
-from langchain_core.documents import Document
-from pypdf import PdfReader, PdfWriter
-from pypdf.errors import PdfReadError
-
-# Disable logging for PyPDF
-logger = logging.getLogger("pypdf")
-logger.setLevel(logging.ERROR)
-
-LAYOUT_ANALYSIS_URL = "https://api.upstage.ai/v1/document-ai/layout-analysis"
-
-DEFAULT_NUMBER_OF_PAGE = 10
-
-OutputType = Literal["text", "html"]
-SplitType = Literal["none", "element", "page"]
-
-
-def validate_api_key(api_key: str) -> None:
-    """
-    Validates the provided API key.
-
-    Args:
-        api_key (str): The API key to be validated.
-
-    Raises:
-        ValueError: If the API key is empty or None.
-
-    Returns:
-        None
-    """
-    if not api_key:
-        raise ValueError("API Key is required for Upstage Document Loader")
-
-
-def validate_file_path(file_path: str) -> None:
-    """
-    Validates if a file exists at the given file path.
-
-    Args:
-        file_path (str): The path to the file.
-
-    Raises:
-        FileNotFoundError: If the file does not exist at the given file path.
-    """
-    if not os.path.exists(file_path):
-        raise FileNotFoundError(f"File not found: {file_path}")
-
-
-def parse_output(data: dict, output_type: Union[OutputType, dict]) -> str:
-    """
-    Parse the output data based on the specified output type.
-
-    Args:
-        data (dict): The data to be parsed.
-        output_type (Union[OutputType, dict]): The output type to parse the element data
-                                               into.
-
-    Returns:
-        str: The parsed output.
-
-    Raises:
-        ValueError: If the output type is invalid.
-    """
-    if isinstance(output_type, dict):
-        if data["category"] in output_type:
-            return data[output_type[data["category"]]]
-        else:
-            return data["text"]
-    elif isinstance(output_type, str):
-        if output_type == "text":
-            return data["text"]
-        elif output_type == "html":
-            return data["html"]
-        else:
-            raise ValueError(f"Invalid output type: {output_type}")
-    else:
-        raise ValueError(f"Invalid output type: {output_type}")
-
-
-def get_from_param_or_env(
-    key: str,
-    param: Optional[str] = None,
-    env_key: Optional[str] = None,
-    default: Optional[str] = None,
-) -> str:
-    """Get a value from a param or an environment variable."""
-    if param is not None:
-        return param
-    elif env_key and env_key in os.environ and os.environ[env_key]:
-        return os.environ[env_key]
-    elif default is not None:
-        return default
-    else:
-        raise ValueError(
-            f"Did not find {key}, please add an environment variable"
-            f" `{env_key}` which contains it, or pass"
-            f"  `{key}` as a named parameter."
-        )
-
-
-class UpstageLayoutAnalysisParser(BaseBlobParser):
-    """Upstage Layout Analysis Parser.
-
-    To use, you should have the environment variable `UPSTAGE_API_KEY`
-    set with your API key or pass it as a named parameter to the constructor.
-
-    Example:
-        .. code-block:: python
-
-            from langchain_upstage import UpstageLayoutAnalysisParser
-
-            loader = UpstageLayoutAnalysisParser(split="page", output_type="text")
-    """
-
-    def __init__(
-        self,
-        api_key: Optional[str] = None,
-        output_type: Union[OutputType, dict] = "html",
-        split: SplitType = "none",
-        use_ocr: Optional[bool] = None,
-        exclude: list = [],
-    ):
-        """
-        Initializes an instance of the Upstage class.
-
-        Args:
-            api_key (str, optional): The API key for accessing the Upstage API.
-                                     Defaults to None, in which case it will be
-                                     fetched from the environment variable
-                                     `UPSTAGE_API_KEY`.
-            output_type (Union[OutputType, dict], optional): The type of output to be
-                                                             generated by the parser.
-                                                             Defaults to "html".
-            split (SplitType, optional): The type of splitting to be applied.
-                                         Defaults to "none" (no splitting).
-            use_ocr (bool, optional): Extract text from images in the document using
-                                      OCR. If the value is True, OCR is used to extract
-                                      text from an image. If the value is False, text is
-                                      extracted from a PDF. (An error will occur if the
-                                      value is False and the input is NOT in PDF format)
-                                      The default value is None, and the default
-                                      behavior will be performed based on the API's
-                                      policy if no value is specified. Please check https://developers.upstage.ai/docs/apis/layout-analysis#request-body.
-            exclude (list, optional): Exclude specific elements from the output.
-                                      Defaults to [] (all included).
-        """
-        if deprecated_key := os.environ.get("UPSTAGE_DOCUMENT_AI_API_KEY"):
-            warnings.warn(
-                "UPSTAGE_DOCUMENT_AI_API_KEY is deprecated."
-                "Please use UPSTAGE_API_KEY instead."
-            )
-        warnings.warn(
-            "UpstageLayoutAnalysisParser is deprecated."
-            "Please use"
-            " langchain_upstage.document_parse_parsers.UpstageDocumentParseParser"
-            " instead."
-        )
-
-        self.api_key = get_from_param_or_env(
-            "UPSTAGE_API_KEY", api_key, "UPSTAGE_API_KEY", deprecated_key
-        )
-
-        self.output_type = output_type
-        self.split = split
-        self.use_ocr = use_ocr
-        self.exclude = exclude
-
-        validate_api_key(self.api_key)
-
-    def _get_response(self, files: Dict) -> List:
-        """
-        Sends a POST request to the API endpoint with the provided files and
-        returns the response.
-
-        Args:
-            files (dict): A dictionary containing the files to be sent in the request.
-
-        Returns:
-            dict: The JSON response from the API.
-
-        Raises:
-            ValueError: If there is an error in the API call.
-        """
-        try:
-            headers = {"Authorization": f"Bearer {self.api_key}"}
-            if self.use_ocr is not None:
-                options = {"ocr": self.use_ocr}
-                response = requests.post(
-                    LAYOUT_ANALYSIS_URL, headers=headers, files=files, data=options
-                )
-            else:
-                response = requests.post(
-                    LAYOUT_ANALYSIS_URL, headers=headers, files=files
-                )
-            response.raise_for_status()
-
-            result = response.json().get("elements", [])
-
-            elements = [
-                element for element in result if element["category"] not in self.exclude
-            ]
-
-            return elements
-
-        except requests.RequestException as req_err:
-            # Handle any request-related exceptions
-            raise ValueError(f"Failed to send request: {req_err}")
-        except json.JSONDecodeError as json_err:
-            # Handle JSON decode errors
-            raise ValueError(f"Failed to decode JSON response: {json_err}")
-        except Exception as err:
-            # Handle any other exceptions
-            raise ValueError(f"An error occurred: {err}")
-
-        return []
-
-    def _split_and_request(
-        self,
-        full_docs: PdfReader,
-        start_page: int,
-        num_pages: int = DEFAULT_NUMBER_OF_PAGE,
-    ) -> List:
-        """
-        Splits the full pdf document into partial pages and sends a request to the
-        server.
-
-        Args:
-            full_docs (PdfReader): The full document to be split and requested.
-            start_page (int): The starting page number for splitting the document.
-            num_pages (int, optional): The number of pages to split the document
-                                       into.
-                                       Defaults to DEFAULT_NUMBER_OF_PAGE.
-
-        Returns:
-            response: The response from the server.
-        """
-        merger = PdfWriter()
-        merger.append(
-            full_docs,
-            pages=(start_page, min(start_page + num_pages, full_docs.get_num_pages())),
-        )
-
-        with io.BytesIO() as buffer:
-            merger.write(buffer)
-            buffer.seek(0)
-            response = self._get_response({"document": buffer})
-
-        return response
-
-    def _element_document(self, elements: Dict, start_page: int = 0) -> Document:
-        """
-        Converts an elements into a Document object.
-
-        Args:
-            elements (Dict) : The elements to convert.
-            start_page (int): The starting page number for splitting the document.
-                              This number starts from zero.
-
-        Returns:
-            A list containing a single Document object.
-
-        """
-        return Document(
-            page_content=(parse_output(elements, self.output_type)),
-            metadata={
-                "page": elements["page"] + start_page,
-                "id": elements["id"],
-                "bounding_box": json.dumps(elements["bounding_box"]),
-                "category": elements["category"],
-            },
-        )
-
-    def _page_document(self, elements: List, start_page: int = 0) -> List[Document]:
-        """
-        Combines elements with the same page number into a single Document object.
-
-        Args:
-            elements (List): A list of elements containing page numbers.
-            start_page (int): The starting page number for splitting the document.
-                              This number starts from zero.
-
-        Returns:
-            List[Document]: A list of Document objects, each representing a page
-                            with its content and metadata.
-        """
-        _docs = []
-        pages = sorted(set(map(lambda x: x["page"], elements)))
-
-        page_group = [
-            [element for element in elements if element["page"] == x] for x in pages
-        ]
-
-        for group in page_group:
-            page_content = " ".join(
-                [parse_output(element, self.output_type) for element in group]
-            )
-
-            _docs.append(
-                Document(
-                    page_content=page_content,
-                    metadata={
-                        "page": group[0]["page"] + start_page,
-                    },
-                )
-            )
-
-        return _docs
-
-    def lazy_parse(self, blob: Blob, is_batch: bool = False) -> Iterator[Document]:
-        """
-        Lazily parses a document and yields Document objects based on the specified
-        split type.
-
-        Args:
-            blob (Blob): The input document blob to parse.
-            is_batch (bool, optional): Whether to parse the document in batches.
-                                       Defaults to False (single page parsing)
-
-        Yields:
-            Document: The parsed document object.
-
-        Raises:
-            ValueError: If an invalid split type is provided.
-
-        """
-
-        if is_batch:
-            num_pages = DEFAULT_NUMBER_OF_PAGE
-        else:
-            num_pages = 1
-
-        try:
-            full_docs = PdfReader(str(blob.path))
-            number_of_pages = full_docs.get_num_pages()
-            is_pdf = True
-        except PdfReadError:
-            number_of_pages = 1
-            is_pdf = False
-        except Exception as e:
-            raise ValueError(f"Failed to read PDF file: {e}")
-
-        if self.split == "none":
-            if is_pdf:
-                result = ""
-                start_page = 0
-                num_pages = DEFAULT_NUMBER_OF_PAGE
-                for _ in range(number_of_pages):
-                    if start_page >= number_of_pages:
-                        break
-
-                    elements = self._split_and_request(full_docs, start_page, num_pages)
-                    for element in elements:
-                        result += parse_output(element, self.output_type)
-
-                    start_page += num_pages
-
-            else:
-                if not blob.path:
-                    raise ValueError("Blob path is required for non-PDF files.")
-
-                result = ""
-                with open(blob.path, "rb") as f:
-                    elements = self._get_response({"document": f})
-
-                for element in elements:
-                    result += parse_output(element, self.output_type)
-
-            yield Document(
-                page_content=result,
-                metadata={
-                    "total_pages": number_of_pages,
-                },
-            )
-
-        elif self.split == "element":
-            if is_pdf:
-                start_page = 0
-                for _ in range(number_of_pages):
-                    if start_page >= number_of_pages:
-                        break
-
-                    elements = self._split_and_request(full_docs, start_page, num_pages)
-                    for element in elements:
-                        yield self._element_document(element, start_page)
-
-                    start_page += num_pages
-
-            else:
-                if not blob.path:
-                    raise ValueError("Blob path is required for non-PDF files.")
-                with open(blob.path, "rb") as f:
-                    elements = self._get_response({"document": f})
-
-                for element in elements:
-                    yield self._element_document(element)
-
-        elif self.split == "page":
-            if is_pdf:
-                start_page = 0
-                for _ in range(number_of_pages):
-                    if start_page >= number_of_pages:
-                        break
-
-                    elements = self._split_and_request(full_docs, start_page, num_pages)
-                    yield from self._page_document(elements, start_page)
-
-                    start_page += num_pages
-            else:
-                if not blob.path:
-                    raise ValueError("Blob path is required for non-PDF files.")
-                with open(blob.path, "rb") as f:
-                    elements = self._get_response({"document": f})
-
-                yield from self._page_document(elements)
-
-        else:
-            raise ValueError(f"Invalid split type: {self.split}")
diff --git a/libs/upstage/langchain_upstage/tools/groundedness_check.py b/libs/upstage/langchain_upstage/tools/groundedness_check.py
index 325e024..40b9840 100644
--- a/libs/upstage/langchain_upstage/tools/groundedness_check.py
+++ b/libs/upstage/langchain_upstage/tools/groundedness_check.py
@@ -72,7 +72,7 @@ def __init__(self, **kwargs: Any) -> None:
             raise ValueError("UPSTAGE_API_KEY must be set or passed")
 
         api_wrapper = ChatUpstage(
-            model="solar-1-mini-answer-verification",
+            model="groundedness-check",
             api_key=upstage_api_key.get_secret_value(),
         )
         super().__init__(upstage_api_key=upstage_api_key, api_wrapper=api_wrapper)
diff --git a/libs/upstage/pyproject.toml b/libs/upstage/pyproject.toml
index b315ac6..0b4a36d 100644
--- a/libs/upstage/pyproject.toml
+++ b/libs/upstage/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-upstage"
-version = "0.4.0"
+version = "0.5.0rc0"
 description = "An integration package connecting Upstage and LangChain"
 authors = []
 readme = "README.md"
diff --git a/libs/upstage/tests/integration_tests/test_chat_models_standard.py b/libs/upstage/tests/integration_tests/test_chat_models_standard.py
index 6337c7b..7af3e9f 100644
--- a/libs/upstage/tests/integration_tests/test_chat_models_standard.py
+++ b/libs/upstage/tests/integration_tests/test_chat_models_standard.py
@@ -17,7 +17,7 @@ def chat_model_class(self) -> Type[BaseChatModel]:
     @property
     def chat_model_params(self) -> dict:
         return {
-            "model": "solar-1-mini-chat",
+            "model": "solar-mini",
         }
 
     @pytest.mark.xfail(reason="Not implemented.")
diff --git a/libs/upstage/tests/integration_tests/test_layout_analysis.py b/libs/upstage/tests/integration_tests/test_layout_analysis.py
deleted file mode 100644
index 07cfb8c..0000000
--- a/libs/upstage/tests/integration_tests/test_layout_analysis.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""Test Upstage layout analysis."""
-
-from pathlib import Path
-from typing import List, get_args
-
-from langchain_upstage import UpstageLayoutAnalysisLoader
-from langchain_upstage.layout_analysis import OutputType, SplitType
-
-EXAMPLE_PDF_PATH = Path(__file__).parent.parent / "examples/solar.pdf"
-
-
-def test_layout_analysis_param() -> None:
-    """Test layout analysis document loader initialization."""
-
-    for output_type in get_args(OutputType):
-        for split in get_args(SplitType):
-            loader = UpstageLayoutAnalysisLoader(
-                file_path=EXAMPLE_PDF_PATH,
-                output_type=output_type,
-                split=split,
-            )
-            assert loader.output_type == output_type
-            assert loader.split == split
-            assert loader.file_path == EXAMPLE_PDF_PATH
-            assert loader.exclude == ["header", "footer"]
-
-            excludes: List[List[str]] = [[], ["header"], ["header", "footer"]]
-            for exclude in excludes:
-                loader = UpstageLayoutAnalysisLoader(
-                    file_path=EXAMPLE_PDF_PATH,
-                    output_type=output_type,
-                    split=split,
-                    exclude=exclude,
-                )
-                assert loader.output_type == output_type
-                assert loader.split == split
-                assert loader.file_path == EXAMPLE_PDF_PATH
-                assert loader.exclude == exclude
-
-
-def test_file_not_found_error() -> None:
-    """Test layout analysis error handling."""
-
-    try:
-        UpstageLayoutAnalysisLoader(
-            file_path="./NOT_EXISTING_FILE.pdf",
-        )
-        assert False
-    except FileNotFoundError:
-        assert True
-
-
-def test_none_split() -> None:
-    """Test layout analysis with no split."""
-
-    for output_type in get_args(OutputType):
-        loader = UpstageLayoutAnalysisLoader(
-            file_path=EXAMPLE_PDF_PATH,
-            output_type=output_type,
-            split="none",
-        )
-        documents = loader.load()
-
-        assert len(documents) == 1
-        assert documents[0].page_content is not None
-        assert documents[0].metadata["total_pages"] == 1
-
-
-def test_element_split() -> None:
-    """Test layout analysis with element split."""
-
-    for output_type in get_args(OutputType):
-        loader = UpstageLayoutAnalysisLoader(
-            file_path=EXAMPLE_PDF_PATH,
-            output_type=output_type,
-            split="element",
-        )
-        documents = loader.load()
-
-        assert len(documents) == 13
-        for document in documents:
-            assert document.page_content is not None
-            assert document.metadata["page"] == 1
-            assert document.metadata["id"] is not None
-            assert document.metadata["bounding_box"] is not None
-            assert isinstance(document.metadata["bounding_box"], str)
-            assert document.metadata["category"] is not None
-
-
-def test_page_split() -> None:
-    """Test layout analysis with page split."""
-
-    for output_type in get_args(OutputType):
-        loader = UpstageLayoutAnalysisLoader(
-            file_path=EXAMPLE_PDF_PATH,
-            output_type=output_type,
-            split="page",
-        )
-        documents = loader.load()
-
-        assert len(documents) == 1
-        for document in documents:
-            assert document.page_content is not None
-            assert document.metadata["page"] == 1
diff --git a/libs/upstage/tests/unit_tests/test_chat_models.py b/libs/upstage/tests/unit_tests/test_chat_models.py
index 4a8690c..213aaed 100644
--- a/libs/upstage/tests/unit_tests/test_chat_models.py
+++ b/libs/upstage/tests/unit_tests/test_chat_models.py
@@ -112,7 +112,7 @@ def mock_completion() -> dict:
         "id": "chatcmpl-7fcZavknQda3SQ",
         "object": "chat.completion",
         "created": 1689989000,
-        "model": "solar-1-mini-chat",
+        "model": "solar-mini",
         "choices": [
             {
                 "index": 0,
@@ -248,12 +248,12 @@ def test_upstage_invoke_name(mock_completion: dict) -> None:
 
 
 def test_upstage_tokenizer() -> None:
-    llm = ChatUpstage(model="solar-1-mini-chat")
+    llm = ChatUpstage(model="solar-mini")
     llm._get_tokenizer()
 
 
 def test_upstage_tokenizer_get_num_tokens() -> None:
-    llm = ChatUpstage(model="solar-1-mini-chat")
+    llm = ChatUpstage(model="solar-mini")
     num_tokens = llm.get_num_tokens_from_messages([HumanMessage(content="Hello World")])
     assert num_tokens == 12
 
@@ -279,4 +279,4 @@ def test_chat_upstage_extra_kwargs() -> None:
 
     # Test that "model" cannot be specified in kwargs
     with pytest.raises(ValueError):
-        ChatUpstage(model_kwargs={"model": "solar-1-mini-chat"})
+        ChatUpstage(model_kwargs={"model": "solar-mini"})
diff --git a/libs/upstage/tests/unit_tests/test_chat_models_standard.py b/libs/upstage/tests/unit_tests/test_chat_models_standard.py
index 89b7ece..038742d 100644
--- a/libs/upstage/tests/unit_tests/test_chat_models_standard.py
+++ b/libs/upstage/tests/unit_tests/test_chat_models_standard.py
@@ -16,7 +16,7 @@ def chat_model_class(self) -> Type[BaseChatModel]:
     @property
     def chat_model_params(self) -> dict:
         return {
-            "model": "solar-1-mini-chat",
+            "model": "solar-mini",
         }
 
     @property
diff --git a/libs/upstage/tests/unit_tests/test_imports.py b/libs/upstage/tests/unit_tests/test_imports.py
index 1d49335..a66d422 100644
--- a/libs/upstage/tests/unit_tests/test_imports.py
+++ b/libs/upstage/tests/unit_tests/test_imports.py
@@ -3,10 +3,8 @@
 EXPECTED_ALL = [
     "ChatUpstage",
     "UpstageEmbeddings",
-    "UpstageLayoutAnalysisLoader",
     "UpstageDocumentParseLoader",
     "UpstageDocumentParseParser",
-    "UpstageLayoutAnalysisParser",
     "UpstageGroundednessCheck",
     "GroundednessCheck",
 ]
diff --git a/libs/upstage/tests/unit_tests/test_layout_analysis.py b/libs/upstage/tests/unit_tests/test_layout_analysis.py
deleted file mode 100644
index a122b85..0000000
--- a/libs/upstage/tests/unit_tests/test_layout_analysis.py
+++ /dev/null
@@ -1,247 +0,0 @@
-import json
-from pathlib import Path
-from typing import Any, Dict, get_args
-from unittest import TestCase
-from unittest.mock import MagicMock, Mock, patch
-
-import requests
-
-from langchain_upstage import UpstageLayoutAnalysisLoader
-from langchain_upstage.layout_analysis import OutputType, SplitType
-
-MOCK_RESPONSE_JSON: Dict[str, Any] = {
-    "api": "1.0",
-    "billed_pages": 1,
-    "elements": [
-        {
-            "bounding_box": [
-                {"x": 74, "y": 906},
-                {"x": 148, "y": 906},
-                {"x": 148, "y": 2338},
-                {"x": 74, "y": 2338},
-            ],
-            "category": "header",
-            "html": "<header id='0'>arXiv:2103.15348v2</header>",
-            "id": 0,
-            "page": 1,
-            "text": "arXiv:2103.15348v2",
-        },
-        {
-            "bounding_box": [
-                {"x": 654, "y": 474},
-                {"x": 1912, "y": 474},
-                {"x": 1912, "y": 614},
-                {"x": 654, "y": 614},
-            ],
-            "category": "paragraph",
-            "html": "<p id='1'>LayoutParser Toolkit</p>",
-            "id": 1,
-            "page": 1,
-            "text": "LayoutParser Toolkit",
-        },
-    ],
-    "html": "<header id='0'>arXiv:2103.15348v2</header>"
-    + "<p id='1'>LayoutParser Toolkit</p>",
-    "mimetype": "multipart/form-data",
-    "model": "layout-analyzer-0.1.0",
-    "text": "arXiv:2103.15348v2LayoutParser Toolkit",
-}
-
-EXAMPLE_PDF_PATH = Path(__file__).parent.parent / "examples/solar.pdf"
-
-
-def test_initialization() -> None:
-    """Test layout analysis document loader initialization."""
-    UpstageLayoutAnalysisLoader(file_path=EXAMPLE_PDF_PATH, api_key="bar")
-
-
-def test_layout_analysis_param() -> None:
-    for output_type in get_args(OutputType):
-        for split in get_args(SplitType):
-            loader = UpstageLayoutAnalysisLoader(
-                file_path=EXAMPLE_PDF_PATH,
-                api_key="bar",
-                output_type=output_type,
-                split=split,
-                exclude=[],
-            )
-            assert loader.output_type == output_type
-            assert loader.split == split
-            assert loader.api_key == "bar"
-            assert loader.file_path == EXAMPLE_PDF_PATH
-
-
-@patch("requests.post")
-def test_none_split_text_output(mock_post: Mock) -> None:
-    mock_post.return_value = MagicMock(
-        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
-    )
-
-    loader = UpstageLayoutAnalysisLoader(
-        file_path=EXAMPLE_PDF_PATH,
-        output_type="text",
-        split="none",
-        api_key="valid_api_key",
-        exclude=[],
-    )
-    documents = loader.load()
-
-    assert len(documents) == 1
-    assert documents[0].page_content == MOCK_RESPONSE_JSON["text"]
-    assert documents[0].metadata["total_pages"] == 1
-
-
-@patch("requests.post")
-def test_element_split_text_output(mock_post: Mock) -> None:
-    mock_post.return_value = MagicMock(
-        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
-    )
-
-    loader = UpstageLayoutAnalysisLoader(
-        file_path=EXAMPLE_PDF_PATH,
-        output_type="text",
-        split="element",
-        api_key="valid_api_key",
-        exclude=[],
-    )
-    documents = loader.load()
-
-    assert len(documents) == 2
-
-    for i, document in enumerate(documents):
-        assert document.page_content == MOCK_RESPONSE_JSON["elements"][i]["text"]
-        assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
-        assert document.metadata["id"] == MOCK_RESPONSE_JSON["elements"][i]["id"]
-        assert document.metadata["bounding_box"] == json.dumps(
-            MOCK_RESPONSE_JSON["elements"][i]["bounding_box"]
-        )
-
-
-@patch("requests.post")
-def test_page_split_text_output(mock_post: Mock) -> None:
-    mock_post.return_value = MagicMock(
-        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
-    )
-
-    loader = UpstageLayoutAnalysisLoader(
-        file_path=EXAMPLE_PDF_PATH,
-        output_type="text",
-        split="page",
-        api_key="valid_api_key",
-        exclude=[],
-    )
-    documents = loader.load()
-
-    assert len(documents) == 1
-
-    for i, document in enumerate(documents):
-        assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
-
-
-@patch("requests.post")
-def test_none_split_html_output(mock_post: Mock) -> None:
-    mock_post.return_value = MagicMock(
-        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
-    )
-
-    loader = UpstageLayoutAnalysisLoader(
-        file_path=EXAMPLE_PDF_PATH,
-        output_type="html",
-        split="none",
-        api_key="valid_api_key",
-        exclude=[],
-    )
-    documents = loader.load()
-
-    assert len(documents) == 1
-    assert documents[0].page_content == MOCK_RESPONSE_JSON["html"]
-    assert documents[0].metadata["total_pages"] == 1
-
-
-@patch("requests.post")
-def test_element_split_html_output(mock_post: Mock) -> None:
-    mock_post.return_value = MagicMock(
-        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
-    )
-
-    loader = UpstageLayoutAnalysisLoader(
-        file_path=EXAMPLE_PDF_PATH,
-        output_type="html",
-        split="element",
-        api_key="valid_api_key",
-        exclude=[],
-    )
-    documents = loader.load()
-
-    assert len(documents) == 2
-
-    for i, document in enumerate(documents):
-        assert document.page_content == MOCK_RESPONSE_JSON["elements"][i]["html"]
-        assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
-        assert document.metadata["id"] == MOCK_RESPONSE_JSON["elements"][i]["id"]
-        assert document.metadata["bounding_box"] == json.dumps(
-            MOCK_RESPONSE_JSON["elements"][i]["bounding_box"]
-        )
-
-
-@patch("requests.post")
-def test_page_split_html_output(mock_post: Mock) -> None:
-    mock_post.return_value = MagicMock(
-        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
-    )
-
-    loader = UpstageLayoutAnalysisLoader(
-        file_path=EXAMPLE_PDF_PATH,
-        output_type="html",
-        split="page",
-        api_key="valid_api_key",
-        exclude=[],
-    )
-    documents = loader.load()
-
-    assert len(documents) == 1
-
-    for i, document in enumerate(documents):
-        assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
-
-
-@patch("requests.post")
-def test_request_exception(mock_post: Mock) -> None:
-    mock_post.side_effect = requests.RequestException("Mocked request exception")
-
-    loader = UpstageLayoutAnalysisLoader(
-        file_path=EXAMPLE_PDF_PATH,
-        output_type="html",
-        split="page",
-        api_key="valid_api_key",
-        exclude=[],
-    )
-
-    with TestCase.assertRaises(TestCase(), ValueError) as context:
-        loader.load()
-
-    assert "Failed to send request: Mocked request exception" == str(context.exception)
-
-
-@patch("requests.post")
-def test_json_decode_error(mock_post: Mock) -> None:
-    mock_response = Mock()
-    mock_response.status_code = 200
-    mock_response.json.side_effect = json.JSONDecodeError("Expecting value", "", 0)
-    mock_post.return_value = mock_response
-
-    loader = UpstageLayoutAnalysisLoader(
-        file_path=EXAMPLE_PDF_PATH,
-        output_type="html",
-        split="page",
-        api_key="valid_api_key",
-        exclude=[],
-    )
-
-    with TestCase.assertRaises(TestCase(), ValueError) as context:
-        loader.load()
-
-    assert (
-        "Failed to decode JSON response: Expecting value: line 1 column 1 (char 0)"
-        == str(context.exception)
-    )