[SAAS-511] Update Model Names and Remove Deprecated Models (#40)

* rename solar models * remove deprecated solar docvision model * remove deprecated layout analysis model * change version
langchain-ai · Jan 8, 2025 · 42ac672 · 42ac672
1 parent 5a21bec
commit 42ac672
Show file tree

Hide file tree

Showing 13 changed files with 17 additions and 1,059 deletions.
diff --git a/libs/upstage/README.md b/libs/upstage/README.md
@@ -24,17 +24,17 @@ See a [usage example](https://python.langchain.com/docs/integrations/text_embedd
 Use `solar-embedding-1-large` model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name.
 `UpstageEmbeddings` will automatically add the suffixes based on the method called.
 
-## Layout Analysis Loader
+## Document Parse Loader
 
 See a [usage example](https://python.langchain.com/v0.1/docs/integrations/document_loaders/upstage/)
 
-The `use_ocr` option determines whether OCR will be used for text extraction from documents. If this option is not specified, the default policy of the [Upstage Layout Analysis API](https://developers.upstage.ai/docs/apis/layout-analysis#request-body) service will be applied. When `use_ocr` is set to `True`, OCR is utilized to extract text. In the case of PDF documents, this involves converting the PDF into images before performing OCR. Conversely, if `use_ocr` is set to `False` for PDF documents, the text information embedded within the PDF is used directly. However, if the input document is not a PDF, such as an image, setting `use_ocr` to `False` will result in an error.
+The `use_ocr` option determines whether OCR will be used for text extraction from documents. If this option is not specified, the default policy of the [Upstage Document Parse API](https://console.upstage.ai/docs/capabilities/document-parse#request) service will be applied. When `use_ocr` is set to `True`, OCR is utilized to extract text. In the case of PDF documents, this involves converting the PDF into images before performing OCR. Conversely, if `use_ocr` is set to `False` for PDF documents, the text information embedded within the PDF is used directly. However, if the input document is not a PDF, such as an image, setting `use_ocr` to `False` will result in an error.
 
 ```python
-from langchain_upstage import UpstageLayoutAnalysisLoader
+from langchain_upstage import UpstageDocumentParseLoader
 
 file_path = "/PATH/TO/YOUR/FILE.image"
-layzer = UpstageLayoutAnalysisLoader(file_path, split="page")
+layzer = UpstageDocumentParseLoader(file_path, split="page")
 
 # For improved memory efficiency, consider using the lazy_load method to load documents page by page.
 docs = layzer.load()  # or layzer.lazy_load()

diff --git a/libs/upstage/langchain_upstage/__init__.py b/libs/upstage/langchain_upstage/__init__.py
@@ -2,8 +2,6 @@
 from langchain_upstage.document_parse import UpstageDocumentParseLoader
 from langchain_upstage.document_parse_parsers import UpstageDocumentParseParser
 from langchain_upstage.embeddings import UpstageEmbeddings
-from langchain_upstage.layout_analysis import UpstageLayoutAnalysisLoader
-from langchain_upstage.layout_analysis_parsers import UpstageLayoutAnalysisParser
 from langchain_upstage.tools.groundedness_check import (
     GroundednessCheck,
     UpstageGroundednessCheck,
@@ -12,10 +10,8 @@
 __all__ = [
     "ChatUpstage",
     "UpstageEmbeddings",
-    "UpstageLayoutAnalysisLoader",
     "UpstageDocumentParseLoader",
     "UpstageDocumentParseParser",
-    "UpstageLayoutAnalysisParser",
     "UpstageGroundednessCheck",
     "GroundednessCheck",
 ]
diff --git a/libs/upstage/langchain_upstage/chat_models.py b/libs/upstage/langchain_upstage/chat_models.py
@@ -56,8 +56,7 @@
 DOC_PARSING_MODEL = ["solar-pro"]
 SOLAR_TOKENIZERS = {
     "solar-pro": "upstage/solar-pro-tokenizer",
-    "solar-1-mini-chat": "upstage/solar-1-mini-tokenizer",
-    "solar-docvision": "upstage/solar-docvision-preview-tokenizer",
+    "solar-mini": "upstage/solar-1-mini-tokenizer",
 }
 
 
@@ -105,7 +104,7 @@ def _get_ls_params(
         params["ls_provider"] = "upstage"
         return params
 
-    model_name: str = Field(default="solar-1-mini-chat", alias="model")
+    model_name: str = Field(default="solar-mini", alias="model")
     """Model name to use."""
     upstage_api_key: SecretStr = Field(
         default_factory=secret_from_env(
@@ -384,7 +383,7 @@ class AnswerWithJustification(BaseModel):
                     justification: str
 
 
-                llm = ChatUpstage(model="solar-1-mini-chat", temperature=0)
+                llm = ChatUpstage(model="solar-mini", temperature=0)
                 structured_llm = llm.with_structured_output(AnswerWithJustification)
 
                 structured_llm.invoke(
@@ -410,7 +409,7 @@ class AnswerWithJustification(BaseModel):
                     justification: str
 
 
-                llm = ChatUpstage(model="solar-1-mini-chat", temperature=0)
+                llm = ChatUpstage(model="solar-mini", temperature=0)
                 structured_llm = llm.with_structured_output(
                     AnswerWithJustification, include_raw=True
                 )
@@ -440,7 +439,7 @@ class AnswerWithJustification(BaseModel):
 
 
                 dict_schema = convert_to_openai_tool(AnswerWithJustification)
-                llm = ChatUpstage(model="solar-1-mini-chat", temperature=0)
+                llm = ChatUpstage(model="solar-mini", temperature=0)
                 structured_llm = llm.with_structured_output(dict_schema)
 
                 structured_llm.invoke(

diff --git a/libs/upstage/langchain_upstage/layout_analysis.py b/libs/upstage/langchain_upstage/layout_analysis.py