Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Format default-python template #2110

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
"source": [
{{- if (eq .include_python "yes") }}
"import sys\n",
"sys.path.append('../src')\n",
"\n",
"sys.path.append(\"../src\")\n",
"from {{.project_name}} import main\n",
"\n",
"main.get_taxis(spark).show(10)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,32 @@ This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the {{.project_name}} project.
"""

from setuptools import setup, find_packages

import sys
sys.path.append('./src')

sys.path.append("./src")

import datetime
import {{.project_name}}

local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S")

setup(
name="{{.project_name}}",
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
version={{.project_name}}.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
version={{.project_name}}.__version__ + "+" + local_version,
url="https://databricks.com",
author="{{user_name}}",
description="wheel file based on {{.project_name}}/src",
packages=find_packages(where='./src'),
package_dir={'': 'src'},
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main={{.project_name}}.main:main"
]
"main={{.project_name}}.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"# Import DLT and src/{{.project_name}}\n",
"import dlt\n",
"import sys\n",
"\n",
"sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
"from pyspark.sql.functions import expr\n",
"from {{.project_name}} import main"
Expand Down Expand Up @@ -63,17 +64,18 @@
{{- if (eq .include_python "yes") }}
"@dlt.view\n",
"def taxi_raw():\n",
" return main.get_taxis(spark)\n",
" return main.get_taxis(spark)\n",
{{else}}
"\n",
"@dlt.view\n",
"def taxi_raw():\n",
" return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n",
" return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n",
{{end -}}
"\n",
"\n",
"@dlt.table\n",
"def filtered_taxis():\n",
" return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
" return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
]
}
],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
from pyspark.sql import SparkSession, DataFrame


def get_taxis(spark: SparkSession) -> DataFrame:
return spark.read.table("samples.nyctaxi.trips")
return spark.read.table("samples.nyctaxi.trips")


# Create a new Databricks Connect session. If this fails,
# check that you have configured Databricks Connect correctly.
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
def get_spark() -> SparkSession:
try:
from databricks.connect import DatabricksSession
return DatabricksSession.builder.getOrCreate()
except ImportError:
return SparkSession.builder.getOrCreate()
try:
from databricks.connect import DatabricksSession

return DatabricksSession.builder.getOrCreate()
except ImportError:
return SparkSession.builder.getOrCreate()


def main():
get_taxis(get_spark()).show(5)
get_taxis(get_spark()).show(5)


if __name__ == '__main__':
main()
if __name__ == "__main__":
main()
Loading