From 4e73ec5da0be1764e449ccc86e567ec701862332 Mon Sep 17 00:00:00 2001 From: Oleksandr Shyshko Date: Fri, 31 May 2019 02:05:43 -0600 Subject: [PATCH 01/41] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 42f9c74..2e906ab 100644 --- a/README.md +++ b/README.md @@ -403,6 +403,7 @@ This will also work for S3 buckets/paths and SSH hosts/ports/paths. You can override multiple URL prefixes, the rule of thumb is: the longest URL prefix that matches your URL wins. ## License +Copyright © Oleksandr Shyshko. All rights reserved. The use and distribution terms for this software are covered by the Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) From 902add9d4e98967064c446b9478987deefdf17e5 Mon Sep 17 00:00:00 2001 From: Arthur Wolf Date: Wed, 19 May 2021 22:46:47 -0400 Subject: [PATCH 02/41] aws creds --- src/uio/fs/s3.clj | 17 +++++++++-------- src/uio/impl.clj | 8 ++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/uio/fs/s3.clj b/src/uio/fs/s3.clj index c367968..deea236 100644 --- a/src/uio/fs/s3.clj +++ b/src/uio/fs/s3.clj @@ -9,7 +9,7 @@ (ns uio.fs.s3 (:require [uio.impl :refer :all] [clojure.string :as str]) - (:import [com.amazonaws.auth BasicAWSCredentials STSAssumeRoleSessionCredentialsProvider AWSCredentialsProvider] + (:import [com.amazonaws.auth BasicAWSCredentials STSAssumeRoleSessionCredentialsProvider AWSCredentialsProvider DefaultAWSCredentialsProviderChain] [com.amazonaws.internal StaticCredentialsProvider] [com.amazonaws.services.s3 AmazonS3Client] [com.amazonaws.services.s3.model ListObjectsRequest ObjectListing S3ObjectSummary GetObjectRequest CannedAccessControlList AmazonS3Exception] @@ -23,13 +23,14 @@ (subs (or (path url) "_") 1)) (defn ^AWSCredentialsProvider ->creds-provider [url] - (let [{:keys [access secret role-arn] :as creds} (url->creds url) - _ (if-not access (die-creds-key-not-found :access url creds)) - _ (if-not secret (die-creds-key-not-found :secret url creds)) - bawsc (BasicAWSCredentials. access secret)] - (if role-arn - (STSAssumeRoleSessionCredentialsProvider. bawsc ^String role-arn "uio-s3-session") - (StaticCredentialsProvider. bawsc)))) + (let [{:keys [access secret role-arn] :as creds} (url->creds url)] + (if (and access + secret) + (let [bawsc (BasicAWSCredentials. access secret)] + (if role-arn + (STSAssumeRoleSessionCredentialsProvider. bawsc ^String role-arn "uio-s3-session") + (StaticCredentialsProvider. bawsc))) + (DefaultAWSCredentialsProviderChain/getInstance)))) (defn with-client-bucket-key [url c-b-k->x] (try-with url diff --git a/src/uio/impl.clj b/src/uio/impl.clj index 85ad7cc..672bc1c 100644 --- a/src/uio/impl.clj +++ b/src/uio/impl.clj @@ -239,12 +239,12 @@ :access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) :secret (or (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY"))} - "s3" {:access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID") (die-creds-key-not-found :access url creds)) - :secret (or (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY") (die-creds-key-not-found :secret url creds)) + "s3" {:access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) + :secret (or (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY")) :role-arn nil} - "sftp" {:user (or (c :sftp.user) (e "SFTP_USER") (e "SSH_USER") (die-creds-key-not-found :user url creds)) - :known-hosts (or (c :sftp.known-hosts) (e "SFTP_KNOWN_HOSTS") (e "SSH_KNOWN_HOSTS") (die-creds-key-not-found :known-hosts url creds)) + "sftp" {:user (or (c :sftp.user) (e "SFTP_USER") (e "SSH_USER")) + :known-hosts (or (c :sftp.known-hosts) (e "SFTP_KNOWN_HOSTS") (e "SSH_KNOWN_HOSTS")) :pass (or (c :sftp.pass) (e "SFTP_PASS") (e "SSH_PASS")) :identity (or (c :sftp.identity) (e "SFTP_IDENTITY") (e "SSH_PRIVATE_KEY")) :identity-pass (or (c :sftp.identity.pass) From db53b3ba005e9d77ac1f8fa9a765870bbc40646e Mon Sep 17 00:00:00 2001 From: changliang1007 Date: Tue, 10 Aug 2021 14:32:30 -0600 Subject: [PATCH 03/41] use `"hadoop.security.authentication" "simple"` --- project.clj | 18 +++++++++++++----- src/uio/fs/hdfs.clj | 8 +------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/project.clj b/project.clj index 067a4cc..b10eb6a 100644 --- a/project.clj +++ b/project.clj @@ -1,10 +1,18 @@ -(defproject uio/uio "1.2-SNAPSHOT" +(defproject uio/uio "1.2-simple-hdfs-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." - :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs"} - - :deploy-repositories [["clojars" {:url "https://clojars.org/repo/" - :sign-releases false}]] + :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" + "foursquare" {:url "https://foursquaredev.jfrog.io/foursquaredev/fsnexus" + :username :env/MVN_USERNAME :password :env/MVN_PASSWORD}} + + :deploy-repositories {"snapshots" {:id "foursquare" + :url "https://foursquaredev.jfrog.io/foursquaredev/fsfactual-snapshots-local" + :username :env/MVN_USERNAME :password :env/MVN_PASSWORD + :sign-releases false} + "releases" {:id "foursquare" + :url "https://foursquaredev.jfrog.io/foursquaredev/fsfactual-releases-local" + :username :env/MVN_USERNAME :password :env/MVN_PASSWORD + :sign-releases false}} :dependencies [[org.clojure/clojure "1.9.0"] diff --git a/src/uio/fs/hdfs.clj b/src/uio/fs/hdfs.clj index d0a70b4..5d0c4f8 100644 --- a/src/uio/fs/hdfs.clj +++ b/src/uio/fs/hdfs.clj @@ -52,17 +52,11 @@ (if (exists? url) (.addResource c (URL. url)))) - (.set c "hadoop.security.authentication" "kerberos") + (.set c "hadoop.security.authentication" "simple") ; https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SecureMode.html (UserGroupInformation/setConfiguration c) ; only use keytab creds if either user or keytab path was specified, otherwise rely on default auth (e.g. if ran from kinit/Yarn) - (when (or principal keytab-path) - (UserGroupInformation/loginUserFromKeytab principal keytab-path) - - ; TODO is there a way to provide more information about the failure? - (if-not (UserGroupInformation/isLoginKeytabBased) - (die "Could not authenticate. Wrong or missing keytab?"))) c)) From b7b8ac76fc9e2eb0d45f358d77521971e063478f Mon Sep 17 00:00:00 2001 From: changliang1007 Date: Tue, 10 Aug 2021 14:34:47 -0600 Subject: [PATCH 04/41] update link --- src/uio/fs/hdfs.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uio/fs/hdfs.clj b/src/uio/fs/hdfs.clj index 5d0c4f8..6987737 100644 --- a/src/uio/fs/hdfs.clj +++ b/src/uio/fs/hdfs.clj @@ -52,7 +52,7 @@ (if (exists? url) (.addResource c (URL. url)))) - (.set c "hadoop.security.authentication" "simple") ; https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SecureMode.html + (.set c "hadoop.security.authentication" "simple") ; https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SecureMode.html#Common_Configurations (UserGroupInformation/setConfiguration c) From 803168a3f8f08cb7627028d076a4fb6dd6261d8d Mon Sep 17 00:00:00 2001 From: Oleksandr Shyshko Date: Fri, 31 May 2019 02:05:43 -0600 Subject: [PATCH 05/41] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a442262..9e1ed20 100644 --- a/README.md +++ b/README.md @@ -434,6 +434,7 @@ This will also work for S3 buckets/paths and SSH hosts/ports/paths. You can override multiple URL prefixes, the rule of thumb is: the longest URL prefix that matches your URL wins. ## License +Copyright © Oleksandr Shyshko. All rights reserved. The use and distribution terms for this software are covered by the Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) From f175c4910b1dcba0382f95af698995a8c4a85682 Mon Sep 17 00:00:00 2001 From: Iris Fu Date: Mon, 22 Nov 2021 12:58:01 -0800 Subject: [PATCH 06/41] rebase to develop --- project.clj | 4 ++-- src/uio/fs/s3.clj | 22 ++++++---------------- src/uio/impl.clj | 16 ++++++++++++---- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/project.clj b/project.clj index 067a4cc..e331ff4 100644 --- a/project.clj +++ b/project.clj @@ -8,8 +8,8 @@ :dependencies [[org.clojure/clojure "1.9.0"] - [com.amazonaws/aws-java-sdk-s3 "1.11.417"] ; s3 - [com.amazonaws/aws-java-sdk-sts "1.11.417"] ; s3 with roles + [com.amazonaws/aws-java-sdk-s3 "1.12.31"] ; s3 + [com.amazonaws/aws-java-sdk-sts "1.12.31"] ; s3 with roles [org.apache.httpcomponents/httpclient "4.5.6"] ; (needed by `aws-java-sdk-s3`) [com.jcraft/jsch "0.1.54"] ; sftp diff --git a/src/uio/fs/s3.clj b/src/uio/fs/s3.clj index c367968..732a937 100644 --- a/src/uio/fs/s3.clj +++ b/src/uio/fs/s3.clj @@ -9,31 +9,21 @@ (ns uio.fs.s3 (:require [uio.impl :refer :all] [clojure.string :as str]) - (:import [com.amazonaws.auth BasicAWSCredentials STSAssumeRoleSessionCredentialsProvider AWSCredentialsProvider] - [com.amazonaws.internal StaticCredentialsProvider] - [com.amazonaws.services.s3 AmazonS3Client] + (:import [com.amazonaws.services.s3 AmazonS3ClientBuilder] [com.amazonaws.services.s3.model ListObjectsRequest ObjectListing S3ObjectSummary GetObjectRequest CannedAccessControlList AmazonS3Exception] [uio.fs S3$S3OutputStream] [java.nio.file NoSuchFileException])) + (defn bucket-key->url [b k] (str "s3://" b default-delimiter (escape-path k))) (defn url->key [^String url] (subs (or (path url) "_") 1)) -(defn ^AWSCredentialsProvider ->creds-provider [url] - (let [{:keys [access secret role-arn] :as creds} (url->creds url) - _ (if-not access (die-creds-key-not-found :access url creds)) - _ (if-not secret (die-creds-key-not-found :secret url creds)) - bawsc (BasicAWSCredentials. access secret)] - (if role-arn - (STSAssumeRoleSessionCredentialsProvider. bawsc ^String role-arn "uio-s3-session") - (StaticCredentialsProvider. bawsc)))) - (defn with-client-bucket-key [url c-b-k->x] (try-with url - #(AmazonS3Client. (->creds-provider url)) + #(AmazonS3ClientBuilder/defaultClient) #(c-b-k->x % (host url) (url->key url)) #(.shutdown %))) @@ -51,7 +41,7 @@ (+ start (:length opts)) (dec (Long/MAX_VALUE)))] - (wrap-is #(AmazonS3Client. (->creds-provider url)) + (wrap-is #(AmazonS3ClientBuilder/defaultClient) #(.getObjectContent (.getObject % (.withRange @@ -60,7 +50,7 @@ end))) #(.shutdown %)))) -(defmethod to :s3 [url & [opts]] (wrap-os #(AmazonS3Client. (->creds-provider url)) +(defmethod to :s3 [url & [opts]] (wrap-os #(AmazonS3ClientBuilder/defaultClient) #(S3$S3OutputStream. % (host url) (url->key url) (some-> opts :acl acl->enum)) #(.shutdown %))) @@ -163,7 +153,7 @@ (defmethod ls :s3 [url & args] (single-file-or url (let [opts (get-opts default-opts-ls url args) - c (AmazonS3Client. (->creds-provider url)) + c (AmazonS3ClientBuilder/defaultClient) b (host url) k (url->key (ensure-ends-with-delimiter url))] (cond->> (close-when-realized-or-finalized diff --git a/src/uio/impl.clj b/src/uio/impl.clj index 85ad7cc..f89e823 100644 --- a/src/uio/impl.clj +++ b/src/uio/impl.clj @@ -255,10 +255,18 @@ ; if hdfs, replace empty strings with nil (required for proper work of HDFS API) + change path to URL (case (scheme url) - "hdfs" (-> creds - (update :principal nie) - (update :keytab #(ensure-url :keytab (nie %)))) - creds))) + ; + "hdfs" {:principal (nie (or (cr :principal) (c :hdfs.keytab.principal) (e "HDFS_KEYTAB_PRINCIPAL") (e "KEYTAB_PRINCIPAL"))) + :keytab (eu :keytab (nie (or (cr :keytab) (c :hdfs.keytab.path) (e "HDFS_KEYTAB_PATH") (e "KEYTAB_FILE")))) + :access (or (cr :access) (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) + :secret (or (cr :secret) (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY"))} + + "sftp" {:user (or (cr :user) (c :sftp.user) (e "SFTP_USER") (e "SSH_USER") (die-no-key :user)) + :known-hosts (or (cr :known-hosts) (c :sftp.known-hosts) (e "SFTP_KNOWN_HOSTS") (e "SSH_KNOWN_HOSTS") (die-no-key :known-hosts)) + :pass (or (cr :pass) (c :sftp.pass) (e "SFTP_PASS") (e "SSH_PASS")) + :identity (or (cr :identity) (c :sftp.identity) (e "SFTP_IDENTITY") (e "SSH_PRIVATE_KEY")) + :identity-pass (or (cr :identity-pass) (c :sftp.identity.pass) + (c :sftp.identity.passphrase) (e "SFTP_IDENTITY_PASS") (e "SSH_PASSPHRASE"))}))) (defn url->creds [url] (url->creds' *config* (into {} (System/getenv)) url)) From 767662a132a217d0818e53f6f4fbc5e96378da30 Mon Sep 17 00:00:00 2001 From: Iris Fu Date: Mon, 22 Nov 2021 13:00:49 -0800 Subject: [PATCH 07/41] correct changes to impl --- src/uio/impl.clj | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/uio/impl.clj b/src/uio/impl.clj index f89e823..1eefefb 100644 --- a/src/uio/impl.clj +++ b/src/uio/impl.clj @@ -239,10 +239,6 @@ :access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) :secret (or (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY"))} - "s3" {:access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID") (die-creds-key-not-found :access url creds)) - :secret (or (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY") (die-creds-key-not-found :secret url creds)) - :role-arn nil} - "sftp" {:user (or (c :sftp.user) (e "SFTP_USER") (e "SSH_USER") (die-creds-key-not-found :user url creds)) :known-hosts (or (c :sftp.known-hosts) (e "SFTP_KNOWN_HOSTS") (e "SSH_KNOWN_HOSTS") (die-creds-key-not-found :known-hosts url creds)) :pass (or (c :sftp.pass) (e "SFTP_PASS") (e "SSH_PASS")) @@ -255,19 +251,11 @@ ; if hdfs, replace empty strings with nil (required for proper work of HDFS API) + change path to URL (case (scheme url) - ; - "hdfs" {:principal (nie (or (cr :principal) (c :hdfs.keytab.principal) (e "HDFS_KEYTAB_PRINCIPAL") (e "KEYTAB_PRINCIPAL"))) - :keytab (eu :keytab (nie (or (cr :keytab) (c :hdfs.keytab.path) (e "HDFS_KEYTAB_PATH") (e "KEYTAB_FILE")))) - :access (or (cr :access) (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) - :secret (or (cr :secret) (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY"))} - - "sftp" {:user (or (cr :user) (c :sftp.user) (e "SFTP_USER") (e "SSH_USER") (die-no-key :user)) - :known-hosts (or (cr :known-hosts) (c :sftp.known-hosts) (e "SFTP_KNOWN_HOSTS") (e "SSH_KNOWN_HOSTS") (die-no-key :known-hosts)) - :pass (or (cr :pass) (c :sftp.pass) (e "SFTP_PASS") (e "SSH_PASS")) - :identity (or (cr :identity) (c :sftp.identity) (e "SFTP_IDENTITY") (e "SSH_PRIVATE_KEY")) - :identity-pass (or (cr :identity-pass) (c :sftp.identity.pass) - (c :sftp.identity.passphrase) (e "SFTP_IDENTITY_PASS") (e "SSH_PASSPHRASE"))}))) - + "hdfs" (-> creds + (update :principal nie) + (update :keytab #(ensure-url :keytab (nie %)))) + creds))) + (defn url->creds [url] (url->creds' *config* (into {} (System/getenv)) url)) From 718ebc2d206170fd798519e7c15c51aa8c485df5 Mon Sep 17 00:00:00 2001 From: Iris Fu Date: Mon, 22 Nov 2021 13:01:14 -0800 Subject: [PATCH 08/41] remove space --- src/uio/impl.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uio/impl.clj b/src/uio/impl.clj index 1eefefb..3e0006e 100644 --- a/src/uio/impl.clj +++ b/src/uio/impl.clj @@ -255,7 +255,7 @@ (update :principal nie) (update :keytab #(ensure-url :keytab (nie %)))) creds))) - + (defn url->creds [url] (url->creds' *config* (into {} (System/getenv)) url)) From ca2126626b4145043ff36cf6cf00c4c6cd783677 Mon Sep 17 00:00:00 2001 From: Iris Fu Date: Mon, 22 Nov 2021 13:38:55 -0800 Subject: [PATCH 09/41] fix tests --- src/uio/impl.clj | 4 +-- test/uio/test_uio.clj | 59 ++++++------------------------------------- 2 files changed, 9 insertions(+), 54 deletions(-) diff --git a/src/uio/impl.clj b/src/uio/impl.clj index 3e0006e..8d2c464 100644 --- a/src/uio/impl.clj +++ b/src/uio/impl.clj @@ -235,9 +235,7 @@ (case (scheme url) ; "hdfs" {:principal (or (c :hdfs.keytab.principal) (e "HDFS_KEYTAB_PRINCIPAL") (e "KEYTAB_PRINCIPAL")) - :keytab (or (c :hdfs.keytab.path) (e "HDFS_KEYTAB_PATH") (e "KEYTAB_FILE")) - :access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) - :secret (or (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY"))} + :keytab (or (c :hdfs.keytab.path) (e "HDFS_KEYTAB_PATH") (e "KEYTAB_FILE"))} "sftp" {:user (or (c :sftp.user) (e "SFTP_USER") (e "SSH_USER") (die-creds-key-not-found :user url creds)) :known-hosts (or (c :sftp.known-hosts) (e "SFTP_KNOWN_HOSTS") (e "SSH_KNOWN_HOSTS") (die-creds-key-not-found :known-hosts url creds)) diff --git a/test/uio/test_uio.clj b/test/uio/test_uio.clj index 3a2b952..19ded1d 100644 --- a/test/uio/test_uio.clj +++ b/test/uio/test_uio.clj @@ -70,7 +70,7 @@ (parent-of "file:///path") => "file:///" (parent-of "file:///") => nil (parent-of "file://") => nil - + (parent-of "/path/to/file.txt") => (throws #"Expected a scheme") (parent-of "path/to/file.txt") => (throws #"Expected a scheme") @@ -103,7 +103,7 @@ (replace-path "fs://user@host:123/path/to/file.txt" nil) => "fs://user@host:123" (replace-path "fs://user@host:123/path/to/file.txt?a=b" "") => "fs://user@host:123?a=b" (replace-path "fs://host/path/to/file.txt" "file.txt") => (throws #"Expected argument") - + (ensure-not-ends-with-delimiter "fs:///") => "fs:///" (ensure-not-ends-with-delimiter "fs:///test/") => "fs:///test" (ensure-not-ends-with-delimiter "fs:///test///") => "fs:///test" @@ -145,13 +145,7 @@ ; :secret "from-config"} (let [c11 {"hdfs://" {:principal "principal-c11" ; v1.1 - :keytab "file:///path/to/keytab-c11" - :access "access-c11" - :secret "secret-c11"} - - "s3://" {:access "access-c11" - :secret "secret-c11" - :role-arn "role-arn-c11"} + :keytab "file:///path/to/keytab-c11"} "sftp://" {:user "user-c11" :known-hosts "known-hosts-c11" @@ -162,9 +156,6 @@ c10 {:hdfs.keytab.principal "principal-c10" ; v1.0 :hdfs.keytab.path "/path/to/keytab-c10" - :s3.access "access-c10" - :s3.secret "secret-c10" - :sftp.user "user-c10" :sftp.known-hosts "known-hosts-c10" :sftp.pass "pass-c10" @@ -174,9 +165,6 @@ e10 {"HDFS_KEYTAB_PRINCIPAL" "principal-e10" ; v1.0 "HDFS_KEYTAB_PATH" "/path/to/keytab-e10" - "AWS_ACCESS" "access-e10" - "AWS_SECRET" "secret-e10" - "SFTP_USER" "user-e10" "SFTP_KNOWN_HOSTS" "known-hosts-e10" "SFTP_PASS" "pass-e10" @@ -186,9 +174,6 @@ e09 {"KEYTAB_PRINCIPAL" "principal-e09" ; v0.9 "KEYTAB_FILE" "/path/to/keytab-e09" - "AWS_ACCESS_KEY_ID" "access-e09" - "AWS_SECRET_ACCESS_KEY" "secret-e09" - "SSH_USER" "user-e09" "SSH_KNOWN_HOSTS" "known-hosts-e09" "SSH_PASS" "pass-e09" @@ -198,30 +183,20 @@ ; c11 works without env and beats c10, e10 and e09 (let[ cr-c11 c11] (url->creds' c11 {} "hdfs://") => (cr-c11 "hdfs://") - (url->creds' c11 {} "s3://") => (cr-c11 "s3://") (url->creds' c11 {} "sftp://") => (cr-c11 "sftp://") (url->creds' (merge c11 c10) {} "hdfs://") => (cr-c11 "hdfs://") - (url->creds' (merge c11 c10) {} "s3://") => (cr-c11 "s3://") (url->creds' (merge c11 c10) {} "sftp://") => (cr-c11 "sftp://") (url->creds' c11 e09 "hdfs://") => (cr-c11 "hdfs://") - (url->creds' c11 e09 "s3://") => (cr-c11 "s3://") (url->creds' c11 e09 "sftp://") => (cr-c11 "sftp://") (url->creds' c11 e10 "hdfs://") => (cr-c11 "hdfs://") - (url->creds' c11 e10 "s3://") => (cr-c11 "s3://") (url->creds' c11 e10 "sftp://") => (cr-c11 "sftp://")) ; c10 works without env and beats e10 and e09 (let[cr-c10 {"hdfs://" {:principal "principal-c10" - :keytab "file:///path/to/keytab-c10" - :access "access-c10" - :secret "secret-c10"} - - "s3://" {:access "access-c10" - :secret "secret-c10" - :role-arn nil} + :keytab "file:///path/to/keytab-c10"} "sftp://" {:user "user-c10" :known-hosts "known-hosts-c10" @@ -230,26 +205,17 @@ :identity-pass "identity-pass-c10"}}] (url->creds' c10 {} "hdfs://") => (cr-c10 "hdfs://") - (url->creds' c10 {} "s3://") => (cr-c10 "s3://") (url->creds' c10 {} "sftp://") => (cr-c10 "sftp://") (url->creds' c10 e09 "hdfs://") => (cr-c10 "hdfs://") - (url->creds' c10 e09 "s3://") => (cr-c10 "s3://") (url->creds' c10 e09 "sftp://") => (cr-c10 "sftp://") (url->creds' c10 e10 "hdfs://") => (cr-c10 "hdfs://") - (url->creds' c10 e10 "s3://") => (cr-c10 "s3://") (url->creds' c10 e10 "sftp://") => (cr-c10 "sftp://")) ; e10 works without config and beats e09 (let [cr-e10 {"hdfs://" {:principal "principal-e10" - :keytab "file:///path/to/keytab-e10" - :access "access-e10" - :secret "secret-e10"} - - "s3://" {:access "access-e10" - :secret "secret-e10" - :role-arn nil} + :keytab "file:///path/to/keytab-e10"} "sftp://" {:user "user-e10" :known-hosts "known-hosts-e10" @@ -258,22 +224,14 @@ :identity-pass "identity-pass-e10"}}] (url->creds' {} e10 "hdfs://") => (cr-e10 "hdfs://") - (url->creds' {} e10 "s3://") => (cr-e10 "s3://") (url->creds' {} e10 "sftp://") => (cr-e10 "sftp://") (url->creds' {} (merge e10 e09) "hdfs://") => (cr-e10 "hdfs://") - (url->creds' {} (merge e10 e09) "s3://") => (cr-e10 "s3://") (url->creds' {} (merge e10 e09) "sftp://") => (cr-e10 "sftp://")) ; e09 works without config (let [cr-e09 {"hdfs://" {:principal "principal-e09" - :keytab "file:///path/to/keytab-e09" - :access "access-e09" - :secret "secret-e09"} - - "s3://" {:access "access-e09" - :secret "secret-e09" - :role-arn nil} + :keytab "file:///path/to/keytab-e09"} "sftp://" {:user "user-e09" :known-hosts "known-hosts-e09" @@ -282,12 +240,11 @@ :identity-pass "identity-pass-e09"}}] (url->creds' {} e09 "hdfs://") => (cr-e09 "hdfs://") - (url->creds' {} e09 "s3://") => (cr-e09 "s3://") (url->creds' {} e09 "sftp://") => (cr-e09 "sftp://"))) ; ensure nil is never returned - (url->creds' {} {} "hdfs:///") => {:access nil, :keytab nil, :principal nil, :secret nil} - (url->creds' {"hdfs:///" nil} {} "hdfs:///") => {:access nil, :keytab nil, :principal nil, :secret nil} + (url->creds' {} {} "hdfs:///") => {:keytab nil, :principal nil} + (url->creds' {"hdfs:///" nil} {} "hdfs:///") => {:keytab nil, :principal nil} (url->creds' {"hdfs:///" {}} {} "hdfs:///") => {:keytab nil, :principal nil}) (facts "Deducing of (de)compression codecs works, even for chained ones" From b3064cfc1f3a3e889b6a6d251be29b5b9172ae06 Mon Sep 17 00:00:00 2001 From: Iris Fu Date: Mon, 22 Nov 2021 13:39:28 -0800 Subject: [PATCH 10/41] rm extra line --- src/uio/fs/s3.clj | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uio/fs/s3.clj b/src/uio/fs/s3.clj index 732a937..d2c602d 100644 --- a/src/uio/fs/s3.clj +++ b/src/uio/fs/s3.clj @@ -14,7 +14,6 @@ [uio.fs S3$S3OutputStream] [java.nio.file NoSuchFileException])) - (defn bucket-key->url [b k] (str "s3://" b default-delimiter (escape-path k))) From 8417698de3bad44ed394f792f7b6ea6c3d9e9f4e Mon Sep 17 00:00:00 2001 From: Iris Fu Date: Tue, 23 Nov 2021 10:45:29 -0800 Subject: [PATCH 11/41] change back s3 access for hdfs --- src/uio/impl.clj | 4 +++- test/uio/test_uio.clj | 29 +++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/uio/impl.clj b/src/uio/impl.clj index 8d2c464..3e0006e 100644 --- a/src/uio/impl.clj +++ b/src/uio/impl.clj @@ -235,7 +235,9 @@ (case (scheme url) ; "hdfs" {:principal (or (c :hdfs.keytab.principal) (e "HDFS_KEYTAB_PRINCIPAL") (e "KEYTAB_PRINCIPAL")) - :keytab (or (c :hdfs.keytab.path) (e "HDFS_KEYTAB_PATH") (e "KEYTAB_FILE"))} + :keytab (or (c :hdfs.keytab.path) (e "HDFS_KEYTAB_PATH") (e "KEYTAB_FILE")) + :access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) + :secret (or (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY"))} "sftp" {:user (or (c :sftp.user) (e "SFTP_USER") (e "SSH_USER") (die-creds-key-not-found :user url creds)) :known-hosts (or (c :sftp.known-hosts) (e "SFTP_KNOWN_HOSTS") (e "SSH_KNOWN_HOSTS") (die-creds-key-not-found :known-hosts url creds)) diff --git a/test/uio/test_uio.clj b/test/uio/test_uio.clj index 19ded1d..f2d81ae 100644 --- a/test/uio/test_uio.clj +++ b/test/uio/test_uio.clj @@ -145,7 +145,9 @@ ; :secret "from-config"} (let [c11 {"hdfs://" {:principal "principal-c11" ; v1.1 - :keytab "file:///path/to/keytab-c11"} + :keytab "file:///path/to/keytab-c11" + :access "access-c11" + :secret "secret-c11"} "sftp://" {:user "user-c11" :known-hosts "known-hosts-c11" @@ -156,6 +158,9 @@ c10 {:hdfs.keytab.principal "principal-c10" ; v1.0 :hdfs.keytab.path "/path/to/keytab-c10" + :s3.access "access-c10" + :s3.secret "secret-c10" + :sftp.user "user-c10" :sftp.known-hosts "known-hosts-c10" :sftp.pass "pass-c10" @@ -165,6 +170,9 @@ e10 {"HDFS_KEYTAB_PRINCIPAL" "principal-e10" ; v1.0 "HDFS_KEYTAB_PATH" "/path/to/keytab-e10" + "AWS_ACCESS" "access-e10" + "AWS_SECRET" "secret-e10" + "SFTP_USER" "user-e10" "SFTP_KNOWN_HOSTS" "known-hosts-e10" "SFTP_PASS" "pass-e10" @@ -174,6 +182,9 @@ e09 {"KEYTAB_PRINCIPAL" "principal-e09" ; v0.9 "KEYTAB_FILE" "/path/to/keytab-e09" + "AWS_ACCESS_KEY_ID" "access-e09" + "AWS_SECRET_ACCESS_KEY" "secret-e09" + "SSH_USER" "user-e09" "SSH_KNOWN_HOSTS" "known-hosts-e09" "SSH_PASS" "pass-e09" @@ -196,7 +207,9 @@ ; c10 works without env and beats e10 and e09 (let[cr-c10 {"hdfs://" {:principal "principal-c10" - :keytab "file:///path/to/keytab-c10"} + :keytab "file:///path/to/keytab-c10" + :access "access-c10" + :secret "secret-c10"} "sftp://" {:user "user-c10" :known-hosts "known-hosts-c10" @@ -215,7 +228,9 @@ ; e10 works without config and beats e09 (let [cr-e10 {"hdfs://" {:principal "principal-e10" - :keytab "file:///path/to/keytab-e10"} + :keytab "file:///path/to/keytab-e10" + :access "access-e10" + :secret "secret-e10"} "sftp://" {:user "user-e10" :known-hosts "known-hosts-e10" @@ -231,7 +246,9 @@ ; e09 works without config (let [cr-e09 {"hdfs://" {:principal "principal-e09" - :keytab "file:///path/to/keytab-e09"} + :keytab "file:///path/to/keytab-e09" + :access "access-e09" + :secret "secret-e09"} "sftp://" {:user "user-e09" :known-hosts "known-hosts-e09" @@ -243,8 +260,8 @@ (url->creds' {} e09 "sftp://") => (cr-e09 "sftp://"))) ; ensure nil is never returned - (url->creds' {} {} "hdfs:///") => {:keytab nil, :principal nil} - (url->creds' {"hdfs:///" nil} {} "hdfs:///") => {:keytab nil, :principal nil} + (url->creds' {} {} "hdfs:///") => {:access nil :keytab nil :principal nil :secret nil} + (url->creds' {"hdfs:///" nil} {} "hdfs:///") => {:access nil, :keytab nil, :principal nil, :secret nil} (url->creds' {"hdfs:///" {}} {} "hdfs:///") => {:keytab nil, :principal nil}) (facts "Deducing of (de)compression codecs works, even for chained ones" From 6fa71fe9aa39dedc39aa8d795e0ce339c1d6af0d Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Tue, 23 Nov 2021 16:52:58 -0800 Subject: [PATCH 12/41] UIO should not control hdfs auth Allow the local configuration to speak for itself --- src/uio/fs/hdfs.clj | 6 ------ src/uio/impl.clj | 20 ++------------------ 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/src/uio/fs/hdfs.clj b/src/uio/fs/hdfs.clj index 6987737..a8c0be2 100644 --- a/src/uio/fs/hdfs.clj +++ b/src/uio/fs/hdfs.clj @@ -29,8 +29,6 @@ (let [c (Configuration.) creds (url->creds url) - principal (:principal creds) - keytab-path (some-> (:keytab creds) path) aws-access (:access creds) aws-secret (:secret creds)] @@ -51,13 +49,9 @@ "file:///etc/hadoop/conf/hdfs-site.xml"]] (if (exists? url) (.addResource c (URL. url)))) - - (.set c "hadoop.security.authentication" "simple") ; https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SecureMode.html#Common_Configurations (UserGroupInformation/setConfiguration c) - ; only use keytab creds if either user or keytab path was specified, otherwise rely on default auth (e.g. if ran from kinit/Yarn) - c)) (defn ->fs [^String url] diff --git a/src/uio/impl.clj b/src/uio/impl.clj index df9bfec..efa03d4 100644 --- a/src/uio/impl.clj +++ b/src/uio/impl.clj @@ -222,21 +222,11 @@ c (or config {}) ; config -- for compatibility, credentials stored as keys e (or env {}) ; env -- for compatibility, comes from JVM process (immutable, extracted as arg for testing) - nie (fn [s] (if (str/blank? s) nil s)) ; nil-if-empty - - ensure-url (fn [k url-or-path] ; ensure-url - (cond (nil? url-or-path) nil - (str/starts-with? url-or-path default-delimiter) (str "file://" url-or-path) - (url? url-or-path) url-or-path - :else (die (str "Expected URL or path that starts with / for " k ", but got: " url-or-path)))) - creds (if creds ; so it's the latest "url -> creds" version creds (case (scheme url) ; - "hdfs" {:principal (or (c :hdfs.keytab.principal) (e "HDFS_KEYTAB_PRINCIPAL") (e "KEYTAB_PRINCIPAL")) - :keytab (or (c :hdfs.keytab.path) (e "HDFS_KEYTAB_PATH") (e "KEYTAB_FILE")) - :access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) + "hdfs" {:access (or (c :s3.access) (e "AWS_ACCESS") (e "AWS_ACCESS_KEY_ID")) :secret (or (c :s3.secret) (e "AWS_SECRET") (e "AWS_SECRET_ACCESS_KEY"))} "sftp" {:user (or (c :sftp.user) (e "SFTP_USER") (e "SSH_USER")) @@ -248,13 +238,7 @@ {}))] ; TODO post-validate pairs? ; TODO fail on unknown keys in `cr`? - - ; if hdfs, replace empty strings with nil (required for proper work of HDFS API) + change path to URL - (case (scheme url) - "hdfs" (-> creds - (update :principal nie) - (update :keytab #(ensure-url :keytab (nie %)))) - creds))) + creds)) (defn url->creds [url] (url->creds' *config* (into {} (System/getenv)) url)) From b24c70b36ad9af5bf030db0aeb503aa5143b188b Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Tue, 23 Nov 2021 16:53:17 -0800 Subject: [PATCH 13/41] Dont override s3 impls --- src/uio/fs/hdfs.clj | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/uio/fs/hdfs.clj b/src/uio/fs/hdfs.clj index a8c0be2..c22a664 100644 --- a/src/uio/fs/hdfs.clj +++ b/src/uio/fs/hdfs.clj @@ -33,15 +33,12 @@ aws-secret (:secret creds)] (when (and aws-access aws-secret) - (.set c "fs.s3a.impl" "org.apache.hadoop.fs.s3a.S3AFileSystem") (.set c "fs.s3a.access.key" aws-access) (.set c "fs.s3a.secret.key" aws-secret) - (.set c "fs.s3n.impl" "org.apache.hadoop.fs.s3native.NativeS3FileSystem") (.set c "fs.s3n.awsAccessKeyId" aws-access) (.set c "fs.s3n.awsSecretAccessKey" aws-secret) - (.set c "fs.s3.impl" "org.apache.hadoop.fs.s3.S3FileSystem") (.set c "fs.s3.awsAccessKeyId" aws-access) (.set c "fs.s3.awsSecretAccessKey" aws-secret)) From 13bc4cac8fefe0bbca250c4446eec8617e5ee6fd Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Mon, 29 Nov 2021 13:00:29 -0800 Subject: [PATCH 14/41] Update project.clj Co-authored-by: irisxingfu <52678253+irisxingfu@users.noreply.github.com> --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index c145ff0..747b5fc 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2-simple-hdfs-SNAPSHOT" +(defproject uio/uio "1.2-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From a20d389e86f02d7f81a66164e2f2701a20106d07 Mon Sep 17 00:00:00 2001 From: Iris Fu Date: Mon, 6 Dec 2021 12:07:43 -0800 Subject: [PATCH 15/41] release 1.2 --- CHANGELOG.md | 2 +- project.clj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c32cb1c..5cb87d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## [1.2] - unreleased +## [1.2] - 2021-12-06 ### Fixed - proper escaping of ` `, `+` and `%` in `file://`, `hdfs://`, `s3://` and `sftp://` - don't lookup credentials from env (compatibility with v1.0) when there is a matching url in configuration diff --git a/project.clj b/project.clj index 747b5fc..c028c93 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2-SNAPSHOT" +(defproject uio/uio "1.2" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From c6468531e7efd95d5986bd5f7adee74b9555e4c3 Mon Sep 17 00:00:00 2001 From: Iris Fu Date: Mon, 6 Dec 2021 12:11:23 -0800 Subject: [PATCH 16/41] bump version to 1.3-SNAPSHOT --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index c028c93..331dce1 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2" +(defproject uio/uio "1.3-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From d5edfb8f56363e8a9482d78c33f5d75b0d47c3d5 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Tue, 7 Dec 2021 13:39:09 -0800 Subject: [PATCH 17/41] ALlow s3 to access bucket in any region --- project.clj | 8 ++++---- src/uio/fs/s3.clj | 13 +++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/project.clj b/project.clj index 331dce1..efe6050 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.3-SNAPSHOT" +(defproject uio/uio "1.2.1-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" @@ -16,8 +16,8 @@ :dependencies [[org.clojure/clojure "1.9.0"] - [com.amazonaws/aws-java-sdk-s3 "1.12.31"] ; s3 - [com.amazonaws/aws-java-sdk-sts "1.12.31"] ; s3 with roles + [com.amazonaws/aws-java-sdk-s3 "1.12.125"] ; s3 + [com.amazonaws/aws-java-sdk-sts "1.12.125"] ; s3 with roles [org.apache.httpcomponents/httpclient "4.5.6"] ; (needed by `aws-java-sdk-s3`) [com.jcraft/jsch "0.1.54"] ; sftp @@ -55,4 +55,4 @@ ; A trick to prevent IntelliJ from resetting compiler/module version to "1.5" :pom-plugins [[org.apache.maven.plugins/maven-compiler-plugin "3.6.1" [:configuration ([:source "1.8"] - [:target "1.8"])]]]) + [:target "1.8"])]]]) diff --git a/src/uio/fs/s3.clj b/src/uio/fs/s3.clj index d2c602d..d1fd81b 100644 --- a/src/uio/fs/s3.clj +++ b/src/uio/fs/s3.clj @@ -20,9 +20,14 @@ (defn url->key [^String url] (subs (or (path url) "_") 1)) +(defn client-for-url [^String url] + (-> (AmazonS3ClientBuilder/standard) + (.withForceGlobalBucketAccessEnabled true) + (.build))) + (defn with-client-bucket-key [url c-b-k->x] (try-with url - #(AmazonS3ClientBuilder/defaultClient) + #(client-for-url url) #(c-b-k->x % (host url) (url->key url)) #(.shutdown %))) @@ -40,7 +45,7 @@ (+ start (:length opts)) (dec (Long/MAX_VALUE)))] - (wrap-is #(AmazonS3ClientBuilder/defaultClient) + (wrap-is #(client-for-url url) #(.getObjectContent (.getObject % (.withRange @@ -49,7 +54,7 @@ end))) #(.shutdown %)))) -(defmethod to :s3 [url & [opts]] (wrap-os #(AmazonS3ClientBuilder/defaultClient) +(defmethod to :s3 [url & [opts]] (wrap-os #(client-for-url url) #(S3$S3OutputStream. % (host url) (url->key url) (some-> opts :acl acl->enum)) #(.shutdown %))) @@ -152,7 +157,7 @@ (defmethod ls :s3 [url & args] (single-file-or url (let [opts (get-opts default-opts-ls url args) - c (AmazonS3ClientBuilder/defaultClient) + c (client-for-url url) b (host url) k (url->key (ensure-ends-with-delimiter url))] (cond->> (close-when-realized-or-finalized From 7101f35bd3562b81d64184efe9dcecfb394c58bf Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Fri, 17 Dec 2021 09:46:13 -0800 Subject: [PATCH 18/41] Check for configured cred overrides in s3 --- CHANGELOG.md | 4 ++++ src/uio/fs/s3.clj | 13 +++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cb87d1..3734010 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased +### Changed +- S3 fs checks for configured access and secret overrides again. + ## [1.2] - 2021-12-06 ### Fixed - proper escaping of ` `, `+` and `%` in `file://`, `hdfs://`, `s3://` and `sftp://` diff --git a/src/uio/fs/s3.clj b/src/uio/fs/s3.clj index d1fd81b..4ea8677 100644 --- a/src/uio/fs/s3.clj +++ b/src/uio/fs/s3.clj @@ -12,7 +12,8 @@ (:import [com.amazonaws.services.s3 AmazonS3ClientBuilder] [com.amazonaws.services.s3.model ListObjectsRequest ObjectListing S3ObjectSummary GetObjectRequest CannedAccessControlList AmazonS3Exception] [uio.fs S3$S3OutputStream] - [java.nio.file NoSuchFileException])) + [java.nio.file NoSuchFileException] + (com.amazonaws.auth BasicAWSCredentials AWSStaticCredentialsProvider))) (defn bucket-key->url [b k] (str "s3://" b default-delimiter (escape-path k))) @@ -21,9 +22,13 @@ (subs (or (path url) "_") 1)) (defn client-for-url [^String url] - (-> (AmazonS3ClientBuilder/standard) - (.withForceGlobalBucketAccessEnabled true) - (.build))) + (let [client-builder (AmazonS3ClientBuilder/standard) + {:keys [access secret]} (url->creds url)] + (when (and access secret) + (.withCredentials client-builder (AWSStaticCredentialsProvider. (BasicAWSCredentials. access secret)))) + (.withForceGlobalBucketAccessEnabled client-builder true) + (.build client-builder))) + (defn with-client-bucket-key [url c-b-k->x] (try-with url From 7512ee44f40091afed694b671fe2e39359eff62f Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Fri, 17 Dec 2021 17:10:50 -0800 Subject: [PATCH 19/41] UIO V1.2.1 --- CHANGELOG.md | 3 +++ project.clj | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3734010..58badec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,10 @@ # Changelog ## Unreleased + +## [1.2.1] - 2021-12-17 ### Changed +- Force ability to use bucket in any region. - S3 fs checks for configured access and secret overrides again. ## [1.2] - 2021-12-06 diff --git a/project.clj b/project.clj index efe6050..a226e1b 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.1-SNAPSHOT" +(defproject uio/uio "1.2.1" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From 86b63b332a29e0abbc8abb2cdd4db8fb5df843c5 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Mon, 29 Aug 2022 13:27:41 -0500 Subject: [PATCH 20/41] Snapshot --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index a226e1b..e1e3f91 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.1" +(defproject uio/uio "1.2.2-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From aea61ce793dd7a966a3517cae16cf56b352d8b32 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Thu, 15 Sep 2022 14:51:57 -0500 Subject: [PATCH 21/41] Make connection timeout configurable --- CHANGELOG.md | 2 ++ src/uio/fs/sftp.clj | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58badec..949cfbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased +### Added +- SFTP fs now allows configuration of the connection timeout: `(uio.fs.sftp/with-sftp-configs {:connection-timeout 10000} #(uio/to* to-url))` ## [1.2.1] - 2021-12-17 ### Changed diff --git a/src/uio/fs/sftp.clj b/src/uio/fs/sftp.clj index ef027f0..de2425e 100644 --- a/src/uio/fs/sftp.clj +++ b/src/uio/fs/sftp.clj @@ -23,10 +23,20 @@ (:import [com.jcraft.jsch JSch ChannelSftp Session SftpException SftpATTRS Channel] [java.io ByteArrayInputStream] [java.util.zip GZIPOutputStream GZIPInputStream] - [java.util Date])) + [java.util Date] + (clojure.lang IPersistentMap))) (def default-timeout-ms 10000) +(def ^:dynamic *sft-connection-config* {:connection-timeout default-timeout-ms}) + +(defn with-sftp-configs [config f] + (if-not (instance? IPersistentMap config) + (die (str "Argument `config` expected to be a map, but was " (.getName (class config))))) + + (binding [*sft-connection-config* (merge *sft-connection-config* config)] + (f))) + ; JSch expects a private key with new-line characters as described in RFC-4716. ; However, it's useful to pass private keys around as a single-line string where new-lines are replaced with space. ; This fn will convert a single-line private key back to multi-line format and make JSch happy. @@ -71,7 +81,8 @@ (.getBytes (or identity-pass "")))) s (.getSession j user (host url) (or (port url) 22)) ; ^Session - _ (.setTimeout s default-timeout-ms) + _ (.setTimeout s (:connection-timeout *sft-connection-config*)) + _ (println (str "Connection timeout is " (:connection-timeout *sft-connection-config*))) _ (.setConfig s "StrictHostKeyChecking" (if known-hosts "yes" "now")) _ (.setPassword s pass) _ (.connect s) @@ -140,7 +151,7 @@ (defmethod copy :sftp [from-url to-url & args] (try-with to-url #(->session+channel to-url) (fn [[_ c]] - (with-open [is (from from-url)] + (with-open [is (from from-url args)] (.put c is (path to-url)))) (fn [[s c]] (.disconnect c) From f2c8ac09e4c106f3b9ebcb9dc8e1cfee77cbd65c Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Thu, 15 Sep 2022 17:18:38 -0500 Subject: [PATCH 22/41] Remove log statement --- src/uio/fs/sftp.clj | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uio/fs/sftp.clj b/src/uio/fs/sftp.clj index de2425e..d06de38 100644 --- a/src/uio/fs/sftp.clj +++ b/src/uio/fs/sftp.clj @@ -82,7 +82,6 @@ s (.getSession j user (host url) (or (port url) 22)) ; ^Session _ (.setTimeout s (:connection-timeout *sft-connection-config*)) - _ (println (str "Connection timeout is " (:connection-timeout *sft-connection-config*))) _ (.setConfig s "StrictHostKeyChecking" (if known-hosts "yes" "now")) _ (.setPassword s pass) _ (.connect s) From 1b72eb08660c205caa2587282e7d3ea1c4753677 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Fri, 16 Sep 2022 14:50:05 -0500 Subject: [PATCH 23/41] Apply suggestions from code review Co-authored-by: Arthur Wolf --- src/uio/fs/sftp.clj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uio/fs/sftp.clj b/src/uio/fs/sftp.clj index d06de38..e184708 100644 --- a/src/uio/fs/sftp.clj +++ b/src/uio/fs/sftp.clj @@ -28,13 +28,13 @@ (def default-timeout-ms 10000) -(def ^:dynamic *sft-connection-config* {:connection-timeout default-timeout-ms}) +(def ^:dynamic *sftp-connection-config* {:connection-timeout default-timeout-ms}) (defn with-sftp-configs [config f] (if-not (instance? IPersistentMap config) (die (str "Argument `config` expected to be a map, but was " (.getName (class config))))) - (binding [*sft-connection-config* (merge *sft-connection-config* config)] + (binding [*sftp-connection-config* (merge *sftp-connection-config* config)] (f))) ; JSch expects a private key with new-line characters as described in RFC-4716. @@ -81,7 +81,7 @@ (.getBytes (or identity-pass "")))) s (.getSession j user (host url) (or (port url) 22)) ; ^Session - _ (.setTimeout s (:connection-timeout *sft-connection-config*)) + _ (.setTimeout s (:connection-timeout *sftp-connection-config*)) _ (.setConfig s "StrictHostKeyChecking" (if known-hosts "yes" "now")) _ (.setPassword s pass) _ (.connect s) From 6ad7d3c130bd1694b6df2f013f0dce83e72a2903 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Fri, 16 Sep 2022 16:58:02 -0500 Subject: [PATCH 24/41] Upgrade aws sdk --- CHANGELOG.md | 2 ++ project.clj | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 949cfbf..39768ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ ## Unreleased ### Added - SFTP fs now allows configuration of the connection timeout: `(uio.fs.sftp/with-sftp-configs {:connection-timeout 10000} #(uio/to* to-url))` +### Changed +- Upgrade AWS SDK to 1.12.300 ## [1.2.1] - 2021-12-17 ### Changed diff --git a/project.clj b/project.clj index e1e3f91..b0423f0 100644 --- a/project.clj +++ b/project.clj @@ -16,9 +16,9 @@ :dependencies [[org.clojure/clojure "1.9.0"] - [com.amazonaws/aws-java-sdk-s3 "1.12.125"] ; s3 - [com.amazonaws/aws-java-sdk-sts "1.12.125"] ; s3 with roles - [org.apache.httpcomponents/httpclient "4.5.6"] ; (needed by `aws-java-sdk-s3`) + [com.amazonaws/aws-java-sdk-s3 "1.12.300"] ; s3 + [com.amazonaws/aws-java-sdk-sts "1.12.300"] ; s3 with roles + [org.apache.httpcomponents/httpclient "4.5.13"] ; (needed by `aws-java-sdk-s3`) [com.jcraft/jsch "0.1.54"] ; sftp [com.jcraft/jzlib "1.1.3"] ; (needed by `jsch`) From 65b990d882ad6911afbd0f0bcba0d7c509ba2b5a Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Fri, 16 Sep 2022 16:58:50 -0500 Subject: [PATCH 25/41] Upgrade jsch --- CHANGELOG.md | 1 + project.clj | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39768ae..c85c676 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - SFTP fs now allows configuration of the connection timeout: `(uio.fs.sftp/with-sftp-configs {:connection-timeout 10000} #(uio/to* to-url))` ### Changed - Upgrade AWS SDK to 1.12.300 +- Upgrade jsch to 0.1.55 ## [1.2.1] - 2021-12-17 ### Changed diff --git a/project.clj b/project.clj index b0423f0..276b72a 100644 --- a/project.clj +++ b/project.clj @@ -20,7 +20,7 @@ [com.amazonaws/aws-java-sdk-sts "1.12.300"] ; s3 with roles [org.apache.httpcomponents/httpclient "4.5.13"] ; (needed by `aws-java-sdk-s3`) - [com.jcraft/jsch "0.1.54"] ; sftp + [com.jcraft/jsch "0.1.55"] ; sftp [com.jcraft/jzlib "1.1.3"] ; (needed by `jsch`) [org.apache.hadoop/hadoop-common "2.8.1" ; hdfs (API) note: 3.1.1 is available, but it can't find HDFS impl From 8ba0de0f53a85b31494f95c4e49d5a4d37963e17 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Fri, 16 Sep 2022 17:00:57 -0500 Subject: [PATCH 26/41] uio 1.2.2 --- CHANGELOG.md | 2 ++ project.clj | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c85c676..887826b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased + +## [1.2.2] - 2022-9-16 ### Added - SFTP fs now allows configuration of the connection timeout: `(uio.fs.sftp/with-sftp-configs {:connection-timeout 10000} #(uio/to* to-url))` ### Changed diff --git a/project.clj b/project.clj index 276b72a..5ea7ef2 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.2-SNAPSHOT" +(defproject uio/uio "1.2.2" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From 534575030a3c8f1da77862d157907a4737fa4d3b Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Fri, 16 Sep 2022 17:02:05 -0500 Subject: [PATCH 27/41] snapshot --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 5ea7ef2..b89c428 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.2" +(defproject uio/uio "1.2.3-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From 5b6cc59156c5dba0330f35746d93b30919111f53 Mon Sep 17 00:00:00 2001 From: Changliang Cao Date: Thu, 5 Jan 2023 11:12:38 -0700 Subject: [PATCH 28/41] rm `Etags don't match` --- src/uio/fs/S3.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/uio/fs/S3.java b/src/uio/fs/S3.java index f7a59c7..f60a6e6 100644 --- a/src/uio/fs/S3.java +++ b/src/uio/fs/S3.java @@ -86,11 +86,6 @@ private void _flush(boolean isLastPart) throws IOException { PartETag remotePartEtag = c.uploadPart(upr).getPartETag(); tags.add(remotePartEtag); - if (!remotePartEtag.getETag().equals(localPartEtag)) { - throw new RuntimeException("Part ETags don't match:\n" + - " - local : " + localPartEtag + "\n" + - " - remote: " + remotePartEtag.getETag()); - } partIndex++; } catch (Exception e) { @@ -120,10 +115,6 @@ public void close() throws IOException { } String localEtag = hex(localPartDigest.digest()) + "-" + partIndex; - if (!localEtag.equals(remoteEtag)) - throw new RuntimeException("Etags don't match:\n" + - " - local : " + localEtag + "\n" + - " - remote: " + remoteEtag); } catch (Exception e) { abort(); // TODO delete remote file if exception happened after `c.completeMultipartUpload(...)` throw e; From 32b0acce198b4e994f27400e92b3bef62384020c Mon Sep 17 00:00:00 2001 From: Changliang Cao Date: Thu, 5 Jan 2023 11:26:08 -0700 Subject: [PATCH 29/41] rm useless assignment --- src/uio/fs/S3.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/uio/fs/S3.java b/src/uio/fs/S3.java index 152ca4b..403afab 100644 --- a/src/uio/fs/S3.java +++ b/src/uio/fs/S3.java @@ -81,7 +81,6 @@ private void assertOpen() throws IOException { private void _flush(boolean isLastPart) throws IOException { partOutputStream.close(); - String localPartEtag = hex(partDigest.digest()); try { UploadPartRequest upr = new UploadPartRequest() .withBucketName(init.getBucketName()) @@ -119,15 +118,12 @@ public void close() throws IOException { " - read : " + read + "\n" + " - written: " + written); - String remoteEtag = c.completeMultipartUpload( - new CompleteMultipartUploadRequest(init.getBucketName(), init.getKey(), init.getUploadId(), tags) - ).getETag(); + c.completeMultipartUpload(new CompleteMultipartUploadRequest(init.getBucketName(), init.getKey(), init.getUploadId(), tags)); partDigest.reset(); for (PartETag tag : tags) { partDigest.update(unhex(tag.getETag())); } - String localEtag = hex(partDigest.digest()) + "-" + partIndex; } catch (Exception e) { abort(); // TODO delete remote file if exception happened after `c.completeMultipartUpload(...)` From e21091583359034595e94901b9bd3eb21e155aea Mon Sep 17 00:00:00 2001 From: Changliang Cao Date: Thu, 5 Jan 2023 11:35:17 -0700 Subject: [PATCH 30/41] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 887826b..c4ef7fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased +### Fixed +- S3OutputStream no longer validates a Multipart Upload's ETag against its MD5 digest. [DEL-2151](https://foursquare.atlassian.net/browse/DEL-2151) ## [1.2.2] - 2022-9-16 ### Added From 6a398fa7d58d84a90dfa01b4ec453e3e846510e4 Mon Sep 17 00:00:00 2001 From: Changliang Cao Date: Thu, 5 Jan 2023 11:38:37 -0700 Subject: [PATCH 31/41] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4ef7fd..6b4eefd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## Unreleased ### Fixed -- S3OutputStream no longer validates a Multipart Upload's ETag against its MD5 digest. [DEL-2151](https://foursquare.atlassian.net/browse/DEL-2151) +- S3OutputStream no longer validates a Multipart Upload's ETag against its MD5 digest, according to [this](https://stackoverflow.com/a/53886736). [DEL-2151](https://foursquare.atlassian.net/browse/DEL-2151) ## [1.2.2] - 2022-9-16 ### Added From dc2baf68218a01819a001b14c0baec980a252c45 Mon Sep 17 00:00:00 2001 From: Changliang Cao Date: Mon, 9 Jan 2023 09:46:53 -0700 Subject: [PATCH 32/41] rm `partDigest` --- src/uio/fs/S3.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/uio/fs/S3.java b/src/uio/fs/S3.java index 403afab..d9cd91d 100644 --- a/src/uio/fs/S3.java +++ b/src/uio/fs/S3.java @@ -29,7 +29,6 @@ public static class S3OutputStream extends OutputStream { private final MessageDigest inDigest = MessageDigest.getInstance("MD5"); private final MessageDigest outDigest = MessageDigest.getInstance("MD5"); - private final MessageDigest partDigest = MessageDigest.getInstance("MD5"); private final File partTempFile; private Streams.StatsableOutputStream partOutputStream; @@ -65,7 +64,6 @@ public void write(byte[] bs, int offset, int length) throws IOException { // append to buffer partOutputStream.write(bs, offset, bytesToCopy); - partDigest.update(bs, offset, bytesToCopy); outDigest.update(bs, offset, bytesToCopy); offset += bytesToCopy; @@ -95,7 +93,6 @@ private void _flush(boolean isLastPart) throws IOException { tags.add(remotePartEtag); - partDigest.reset(); partOutputStream = new Streams.StatsableOutputStream(new FileOutputStream(partTempFile)); partIndex++; } catch (Exception e) { @@ -120,11 +117,6 @@ public void close() throws IOException { c.completeMultipartUpload(new CompleteMultipartUploadRequest(init.getBucketName(), init.getKey(), init.getUploadId(), tags)); - partDigest.reset(); - for (PartETag tag : tags) { - partDigest.update(unhex(tag.getETag())); - } - } catch (Exception e) { abort(); // TODO delete remote file if exception happened after `c.completeMultipartUpload(...)` throw e; From b43b451a50d3234e8e7ec7970a59e786a49fb369 Mon Sep 17 00:00:00 2001 From: Changliang Cao Date: Mon, 9 Jan 2023 10:14:46 -0700 Subject: [PATCH 33/41] release 1.2.3 --- CHANGELOG.md | 2 ++ project.clj | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b4eefd..74b48f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased + +## [1.2.3] - 2023-01-09 ### Fixed - S3OutputStream no longer validates a Multipart Upload's ETag against its MD5 digest, according to [this](https://stackoverflow.com/a/53886736). [DEL-2151](https://foursquare.atlassian.net/browse/DEL-2151) diff --git a/project.clj b/project.clj index b89c428..789e888 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.3-SNAPSHOT" +(defproject uio/uio "1.2.3" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From 01a8dc7e0e4e11d868ffa446d44e8ea4276e2c2a Mon Sep 17 00:00:00 2001 From: Changliang Cao Date: Mon, 9 Jan 2023 10:16:23 -0700 Subject: [PATCH 34/41] bump to 1.2.4-SNAPSHOT --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 789e888..939c00a 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.3" +(defproject uio/uio "1.2.4-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From 6add7fc010af7f592e03721647eb31f9f91aabf2 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Mon, 23 Jan 2023 15:34:29 -0600 Subject: [PATCH 35/41] Support listing in res fs --- CHANGELOG.md | 2 ++ README.md | 2 +- project.clj | 1 + resources/test/test.txt | 1 + src/uio/fs/res.clj | 18 ++++++++++++++++-- test/uio/fs/test_res.clj | 25 +++++++++++++++++++++++++ 6 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 resources/test/test.txt create mode 100644 test/uio/fs/test_res.clj diff --git a/CHANGELOG.md b/CHANGELOG.md index 74b48f9..baa6d1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased +### Added +- Support for listing in resource filesystem. ## [1.2.3] - 2023-01-09 ### Fixed diff --git a/README.md b/README.md index 9e1ed20..9829c3d 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Features: |[hdfs](#hdfs) | • | • | • | • | • | • | • | • |`hdfs://[host]/path/to/file.txt` | |[http(s)](#https) | • | |:cat:| :cat: | | | | |`http[s]://host[:port]/path/to/file.txt`| |[mem](#mem) | • | • | • | • | • | • | • | • |`mem:///path/to/file.txt` | -|[res](#res) | • | | | • | | | | |`res:///com/mypackage/file.txt` | +|[res](#res) | • | | | • | | | • | |`res:///com/mypackage/file.txt` | |[s3](#s3) | • | • | • | • | • |:dog:| • | • |`s3://bucket/key/with/slashes.txt` | |[sftp](#sftp) | • |:bug:| • | • |:pig: | • | • | • |`sftp://host[:port]/path/to/file.txt` | diff --git a/project.clj b/project.clj index 939c00a..70cafd4 100644 --- a/project.clj +++ b/project.clj @@ -40,6 +40,7 @@ "-target" "1.8" "-Xlint:deprecation" "-Xlint:unchecked"] + :resource-paths ["resources"] :profiles {:dev {:dependencies [[midje "1.9.2"]] :plugins [[lein-midje "3.2.1"]]}} diff --git a/resources/test/test.txt b/resources/test/test.txt new file mode 100644 index 0000000..3f1837b --- /dev/null +++ b/resources/test/test.txt @@ -0,0 +1 @@ +gdbg diff --git a/src/uio/fs/res.clj b/src/uio/fs/res.clj index 78f4951..f0ab1c8 100644 --- a/src/uio/fs/res.clj +++ b/src/uio/fs/res.clj @@ -4,8 +4,10 @@ ; ^^^ triple slash ; (ns uio.fs.res - (:require [uio.impl :refer :all]) - (:import (clojure.java.api Clojure))) + (:require + [uio.impl :refer :all]) + (:import (clojure.java.api Clojure) + (java.io File))) (defn assert-res-url [url] (if (host url) @@ -20,3 +22,15 @@ (defmethod exists? :res [url & args] (if (.getResource Clojure (path (assert-res-url url))) true false)) + +(defmethod ls :res [url & args] + (->> + (.substring (path (normalize url)) 1) ; get path and remove leading slash + (.getResources (.getClassLoader Clojure)) ; Multiple resources can have the same name + (enumeration-seq) + (map #(File. (.getPath %))) + (map #(if (.isFile %) + % + (seq (.listFiles %)))) + (flatten) ; If it's a directory, flatten the list of files. + (map #(do {:url (str "file://" %)})))) ; expected format is a list of maps diff --git a/test/uio/fs/test_res.clj b/test/uio/fs/test_res.clj new file mode 100644 index 0000000..b11f070 --- /dev/null +++ b/test/uio/fs/test_res.clj @@ -0,0 +1,25 @@ +(ns uio.fs.test-res + (:require [midje.sweet :refer :all] + [uio.fs.mem :refer :all] + [uio.impl :refer :all])) + +(facts "from" + (slurp (from "res:///test/test.txt")) => "gdbg\n") + +(facts "exists" + (exists? "res:///") => true + (exists? "res:///test") => true + (exists? "res:///test/") => true + (exists? "res:///test/te") => false + (exists? "res:///test/test.txt") => true) + +(facts "Listing" + (fact "listing root succeeds" + (ls "res:///") =not=> []) + (fact "listing test dir gives correct results" + (count (ls "res:///test/")) => 1 + (:url (first (ls "res:///test"))) => (has-suffix "test/test.txt") + (:url (first (ls "res:///test/"))) => (has-suffix "test/test.txt")) + (fact "listing test file returns just the same file" + (count (ls "res:///test/test.txt")) => 1 + (:url (first (ls "res:///test/test.txt"))) => (has-suffix "test/test.txt"))) From 620fe3fa4bf274f46ea947214d1947837be4831e Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Wed, 25 Jan 2023 12:19:22 -0600 Subject: [PATCH 36/41] Release V1.2.4 --- CHANGELOG.md | 2 ++ project.clj | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index baa6d1e..cfbb73a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased + +## [1.2.4] - 2023-01-25 ### Added - Support for listing in resource filesystem. diff --git a/project.clj b/project.clj index 70cafd4..409ac13 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.4-SNAPSHOT" +(defproject uio/uio "1.2.4" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From c799bdc255fe77d314145fdcfe7dd8cf64630993 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Wed, 25 Jan 2023 12:19:50 -0600 Subject: [PATCH 37/41] Snapshot --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 409ac13..7c04377 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.4" +(defproject uio/uio "1.2.5-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From 0bbe9f83433e9588b6211aff810d1dabbc282d4e Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Mon, 30 Jan 2023 13:44:35 -0600 Subject: [PATCH 38/41] Remove keytab+principle from hdfs tests --- src/uio/fs/hdfs.clj | 2 +- test/uio/test_uio.clj | 37 ++++++++++--------------------------- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/src/uio/fs/hdfs.clj b/src/uio/fs/hdfs.clj index c22a664..60601bd 100644 --- a/src/uio/fs/hdfs.clj +++ b/src/uio/fs/hdfs.clj @@ -8,7 +8,7 @@ ; :access (optional) S3 access ; :secret (optional) S3 secret ; -; NOTE: to use `kinit` isntead of keytab file, pass empty creds (`{}` or all nil values) +; NOTE: to use `kinit` instead of keytab file, pass empty creds (`{}` or all nil values) ; (ns uio.fs.hdfs (:require [clojure.string :as str] diff --git a/test/uio/test_uio.clj b/test/uio/test_uio.clj index f2d81ae..fcca1ac 100644 --- a/test/uio/test_uio.clj +++ b/test/uio/test_uio.clj @@ -144,9 +144,7 @@ ; :access "from-config" ; :secret "from-config"} - (let [c11 {"hdfs://" {:principal "principal-c11" ; v1.1 - :keytab "file:///path/to/keytab-c11" - :access "access-c11" + (let [c11 {"hdfs://" {:access "access-c11" :secret "secret-c11"} "sftp://" {:user "user-c11" @@ -155,10 +153,7 @@ :identity "identity-c11" :identity-pass "identity-pass-c11"}} - c10 {:hdfs.keytab.principal "principal-c10" ; v1.0 - :hdfs.keytab.path "/path/to/keytab-c10" - - :s3.access "access-c10" + c10 {:s3.access "access-c10" :s3.secret "secret-c10" :sftp.user "user-c10" @@ -167,10 +162,7 @@ :sftp.identity "identity-c10" :sftp.identity.pass "identity-pass-c10"} - e10 {"HDFS_KEYTAB_PRINCIPAL" "principal-e10" ; v1.0 - "HDFS_KEYTAB_PATH" "/path/to/keytab-e10" - - "AWS_ACCESS" "access-e10" + e10 {"AWS_ACCESS" "access-e10" "AWS_SECRET" "secret-e10" "SFTP_USER" "user-e10" @@ -179,10 +171,7 @@ "SFTP_IDENTITY" "identity-e10" "SFTP_IDENTITY_PASS" "identity-pass-e10"} - e09 {"KEYTAB_PRINCIPAL" "principal-e09" ; v0.9 - "KEYTAB_FILE" "/path/to/keytab-e09" - - "AWS_ACCESS_KEY_ID" "access-e09" + e09 {"AWS_ACCESS_KEY_ID" "access-e09" "AWS_SECRET_ACCESS_KEY" "secret-e09" "SSH_USER" "user-e09" @@ -206,9 +195,7 @@ (url->creds' c11 e10 "sftp://") => (cr-c11 "sftp://")) ; c10 works without env and beats e10 and e09 - (let[cr-c10 {"hdfs://" {:principal "principal-c10" - :keytab "file:///path/to/keytab-c10" - :access "access-c10" + (let[cr-c10 {"hdfs://" {:access "access-c10" :secret "secret-c10"} "sftp://" {:user "user-c10" @@ -227,9 +214,7 @@ (url->creds' c10 e10 "sftp://") => (cr-c10 "sftp://")) ; e10 works without config and beats e09 - (let [cr-e10 {"hdfs://" {:principal "principal-e10" - :keytab "file:///path/to/keytab-e10" - :access "access-e10" + (let [cr-e10 {"hdfs://" {:access "access-e10" :secret "secret-e10"} "sftp://" {:user "user-e10" @@ -245,9 +230,7 @@ (url->creds' {} (merge e10 e09) "sftp://") => (cr-e10 "sftp://")) ; e09 works without config - (let [cr-e09 {"hdfs://" {:principal "principal-e09" - :keytab "file:///path/to/keytab-e09" - :access "access-e09" + (let [cr-e09 {"hdfs://" {:access "access-e09" :secret "secret-e09"} "sftp://" {:user "user-e09" @@ -260,9 +243,9 @@ (url->creds' {} e09 "sftp://") => (cr-e09 "sftp://"))) ; ensure nil is never returned - (url->creds' {} {} "hdfs:///") => {:access nil :keytab nil :principal nil :secret nil} - (url->creds' {"hdfs:///" nil} {} "hdfs:///") => {:access nil, :keytab nil, :principal nil, :secret nil} - (url->creds' {"hdfs:///" {}} {} "hdfs:///") => {:keytab nil, :principal nil}) + (url->creds' {} {} "hdfs:///") => {:access nil :secret nil} + (url->creds' {"hdfs:///" nil} {} "hdfs:///") => {:access nil :secret nil} + (url->creds' {"hdfs:///" {}} {} "hdfs:///") => {}) (facts "Deducing of (de)compression codecs works, even for chained ones" (map first (url->seq-of-ext+s->s ext->is->is "hdfs:///far-away/and/well-archived.xz.bz2.gz")) => [:gz :bz2 :xz] From eba657dbb72c05334505a4505729f7bac8f9fd2a Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Mon, 30 Jan 2023 13:45:04 -0600 Subject: [PATCH 39/41] Add size to res ls --- CHANGELOG.md | 2 ++ src/uio/fs/res.clj | 2 +- test/uio/fs/test_res.clj | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfbb73a..6a817e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased +### Added +- Size field when listing in resource filesystem. ## [1.2.4] - 2023-01-25 ### Added diff --git a/src/uio/fs/res.clj b/src/uio/fs/res.clj index f0ab1c8..73a7542 100644 --- a/src/uio/fs/res.clj +++ b/src/uio/fs/res.clj @@ -33,4 +33,4 @@ % (seq (.listFiles %)))) (flatten) ; If it's a directory, flatten the list of files. - (map #(do {:url (str "file://" %)})))) ; expected format is a list of maps + (map #(do {:url (str "file://" %) :size (.length %)})))) ; expected format is a list of maps diff --git a/test/uio/fs/test_res.clj b/test/uio/fs/test_res.clj index b11f070..18e80f1 100644 --- a/test/uio/fs/test_res.clj +++ b/test/uio/fs/test_res.clj @@ -1,6 +1,6 @@ (ns uio.fs.test-res (:require [midje.sweet :refer :all] - [uio.fs.mem :refer :all] + [uio.fs.res :refer :all] [uio.impl :refer :all])) (facts "from" @@ -19,7 +19,8 @@ (fact "listing test dir gives correct results" (count (ls "res:///test/")) => 1 (:url (first (ls "res:///test"))) => (has-suffix "test/test.txt") - (:url (first (ls "res:///test/"))) => (has-suffix "test/test.txt")) + (:url (first (ls "res:///test/"))) => (has-suffix "test/test.txt") + (:size (first (ls "res:///test/"))) => 5) (fact "listing test file returns just the same file" (count (ls "res:///test/test.txt")) => 1 (:url (first (ls "res:///test/test.txt"))) => (has-suffix "test/test.txt"))) From e59ac060601a9a3fb98077d173533202ca27e7a8 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Mon, 30 Jan 2023 15:15:35 -0600 Subject: [PATCH 40/41] Release V1.2.5 --- CHANGELOG.md | 2 ++ project.clj | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a817e8..6aa2288 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased + +## [1.2.5] - 2023-01-30 ### Added - Size field when listing in resource filesystem. diff --git a/project.clj b/project.clj index 7c04377..cf8bccb 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.5-SNAPSHOT" +(defproject uio/uio "1.2.5" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs" From cfcdbab85c24053af1916828e69c8dc008ef4c58 Mon Sep 17 00:00:00 2001 From: Francesco Macagno Date: Mon, 30 Jan 2023 15:15:58 -0600 Subject: [PATCH 41/41] Snapshot --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index cf8bccb..273aecf 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject uio/uio "1.2.5" +(defproject uio/uio "1.2.6-SNAPSHOT" :description "uio is a Clojure library and a command line tool for accessing HDFS, S3, SFTP and other file systems." :repositories {"cloudera" "https://repository.cloudera.com/content/groups/cdh-releases-rcs"