Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Oct 1, 2024
1 parent af6c548 commit abf6f47
Showing 1 changed file with 28 additions and 36 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from enum import Enum, StrEnum
from pydantic import BaseModel, constr

import numpy as np
import pandas as pd

from pydantic import BaseModel, constr

LIST_PFAM_KD = [
"Protein kinase domain",
Expand Down Expand Up @@ -70,9 +70,9 @@ class Family(Enum):
Null = None


KinaseDomainName = StrEnum("KinaseDomainName",
{"KD" + str(idx + 1): kd \
for idx, kd in enumerate(LIST_PFAM_KD)})
KinaseDomainName = StrEnum(
"KinaseDomainName", {"KD" + str(idx + 1): kd for idx, kd in enumerate(LIST_PFAM_KD)}
)


class KinHub(BaseModel):
Expand All @@ -93,6 +93,7 @@ class UniProt(BaseModel):

class KLIFS(BaseModel):
"""Pydantic model for KLIFS information."""

gene_name: str
name: str
full_name: str
Expand All @@ -105,6 +106,7 @@ class KLIFS(BaseModel):

class Pfam(BaseModel):
"""Pydantic model for Pfam information."""

domain_name: KinaseDomainName
start: int
end: int
Expand All @@ -126,6 +128,7 @@ class KinaseInfo(BaseModel):

class CollectionKinaseInfo(BaseModel):
"""Pydantic model for kinase information."""

kinase_dict: dict[str, KinaseInfo]


Expand Down Expand Up @@ -156,12 +159,12 @@ def concatenate_source_dataframe(
# columns to include in the final dataframe
if col_pfam_include is None:
col_pfam_include = [
"name",
"name",
"start",
"end",
"protein_length",
"pfam_accession",
"in_alphafold"
"in_alphafold",
]

# list of Pfam domains to include
Expand All @@ -171,13 +174,12 @@ def concatenate_source_dataframe(
# set indices to merge columns
kinhub_df_merge = kinhub_df.set_index(col_kinhub_merge)
uniprot_df_merge = uniprot_df.set_index(col_uniprot_merge)
klifs_df_merge = klifs_df.set_index(col_klifs_merge)
klifs_df_merge = klifs_df.set_index(col_klifs_merge)
pfam_df_merge = pfam_df.set_index(col_pfam_merge)

# filter Pfam dataframe for KD domains and columns to include
df_pfam_kd = pfam_df_merge.loc[
pfam_df_merge["name"].isin(LIST_PFAM_KD),
col_pfam_include
pfam_df_merge["name"].isin(LIST_PFAM_KD), col_pfam_include
]

# rename "name" column in Pfam so doesn't conflict with KLIFS name
Expand All @@ -188,17 +190,12 @@ def concatenate_source_dataframe(

# concat dataframes
df_merge = pd.concat(
[
kinhub_df_merge,
uniprot_df_merge,
klifs_df_merge,
df_pfam_kd
],
[kinhub_df_merge, uniprot_df_merge, klifs_df_merge, df_pfam_kd],
join="outer",
axis=1
).reset_index()
axis=1,
).reset_index()

return df_merge
return df_merge


def is_not_valid_string(str_input: str) -> bool:
Expand All @@ -208,12 +205,9 @@ def is_not_valid_string(str_input: str) -> bool:
return False


def convert_to_group(
str_input: str,
bool_list: bool = True
) -> list[Group]:
def convert_to_group(str_input: str, bool_list: bool = True) -> list[Group]:
"""Convert KinHub group to Group enum.
Parameters
----------
str_input : str
Expand All @@ -234,7 +228,7 @@ def convert_to_group(

def convert_str2family(str_input: str) -> Family:
"""Convert string to Family enum.
Parameters
----------
str_input : str
Expand All @@ -259,7 +253,7 @@ def convert_to_family(
bool_list: bool = True,
) -> Family:
"""Convert KinHub family to Family enum.
Parameters
----------
str_input : str
Expand All @@ -284,20 +278,18 @@ def create_kinase_models_from_df(
# create KinHub model
dict_kinase_models = {}

for _, row in df.iterrows():
# create KinHub model
for _, row in df.iterrows():
# create KinHub model
kinhub = KinHub(
kinase_name=row["Kinase Name"],
manning_name=row["Manning Name"].split(", "),
xname=row["xName"].split(", "),
group=convert_to_group(row["Group"]),
family=convert_to_family(row["Family"])
family=convert_to_family(row["Family"]),
)

# create UniProt model
uniprot = UniProt(
canonical_seq=row["canonical_sequence"]
)
uniprot = UniProt(canonical_seq=row["canonical_sequence"])

# create KLIFS model
if is_not_valid_string(row["family"]):
Expand All @@ -315,7 +307,7 @@ def create_kinase_models_from_df(
family=convert_to_family(row["family"], bool_list=False),
iuphar=row["iuphar"],
kinase_id=row["kinase_ID"],
pocket_seq=pocket
pocket_seq=pocket,
)

# create Pfam model
Expand All @@ -328,7 +320,7 @@ def create_kinase_models_from_df(
end=row["end"],
protein_length=row["protein_length"],
pfam_accession=row["pfam_accession"],
in_alphafold=row["in_alphafold"]
in_alphafold=row["in_alphafold"],
)

# create KinaseInfo model
Expand All @@ -338,9 +330,9 @@ def create_kinase_models_from_df(
KinHub=kinhub,
UniProt=uniprot,
KLIFS=klifs,
Pfam=pfam
Pfam=pfam,
)

dict_kinase_models[row["HGNC Name"]] = kinase_info

return dict_kinase_models
return dict_kinase_models

0 comments on commit abf6f47

Please sign in to comment.