Skip to content

Commit

Permalink
Merge pull request #14 from StefanHaunsberger/pull-request/2fab99ca
Browse files Browse the repository at this point in the history
cypherQuery: Return DataFrames.DataFrame from MATCH query
  • Loading branch information
StefanHaunsberger authored Jul 19, 2018
2 parents cd2d031 + 280a68d commit 3255739
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 2 deletions.
8 changes: 7 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
language: julia
julia:
- 0.6
- 0.6.0
- nightly
notifications:
email: false
## (tests will run but not make your overall status red)
matrix:
allow_failures:
- julia: nightly

before_install:
- wget dist.neo4j.org/neo4j-community-3.3.5-unix.tar.gz
- tar -xzf neo4j-community-3.3.5-unix.tar.gz
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,12 @@ Rollbacks are also supported:
```julia
rollback(tx)
```

If the goal is to simply run a MATCH query and having the result in the form of a
`DataFrames.DataFrame` object, the `cypherQuery` function can be used.
The `cypherQuery` implementation performs the query in a single trnsaction which
automatically opens and closes the transaction:

```julia
results = cypherQuery("MATCH (n) RETURN n.property AS Property")
```
2 changes: 2 additions & 0 deletions REQUIRE
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ julia 0.6
Compat 0.7.13
JSON
Requests
DataFrames
Missings
7 changes: 6 additions & 1 deletion src/Neo4j.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export getgraph, version, createnode, getnode, deletenode, setnodeproperty, getn
getnodeproperties, updatenodeproperties, deletenodeproperties, deletenodeproperty,
addnodelabel, addnodelabels, updatenodelabels, deletenodelabel, getnodelabels,
getnodesforlabel, getlabels, getrel, getrels, getneighbors, createrel, deleterel, getrelproperty,
getrelproperties, updaterelproperties
getrelproperties, updaterelproperties, cypherQuery
export Connection, Result

const DEFAULT_HOST = "localhost"
Expand Down Expand Up @@ -393,4 +393,9 @@ function updaterelproperties(rel::Relationship, props::JSONObject)
request(rel.properties, Requests.put, 204, connheaders(rel.graph.connection); json=props)
end

# ------------
# Cypher query
# ------------
include("cypherQuery.jl")

end # module
108 changes: 108 additions & 0 deletions src/cypherQuery.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@

using DataFrames, Missings;

"""
cypherQuery(conn, cypher, params...; elTypes, nRowsElTypeCheck)
Retrieve molecular identifier from other databases, `targetDb`, for single or mulitple query IDs, `queryId`,
and moreover information on Ensembl gene, transcript and peptide IDs, such as ID and genomic loation.
### Arguments
- `conn::Neo4j.Connection` : a valid connection to a Neo4j graph DB instance.
- `cypher::String` : Cypher `MATCH` query returning tabular data.
- `params::Pair` : parameters which are passed on to the cypher query.
- `elTypes::Vector{Type}` : column types can be provided manually as a Vector{Type}
- `nRowsElTypeCheck::Int` : Number of rows which are used to determine column datatypes (defaults to 1000)
### Examples
```julia-repl
julia> cypherQuery(
Neo4j.Connection("localhost"),
"MATCH (p :Person {name: {name}}) RETURN p.name AS Name, p.age AS Age;",
"name" => "John Doe")
```
"""
function cypherQuery(
conn::Connection,
cypher::AbstractString,
params::Pair...;
elTypes::Vector{DataType} = Vector{DataType}(),
nRowsElTypeCheck::Int = 1000)

url = connurl(conn, "transaction/commit")
headers = connheaders(conn)
body = Dict("statements" => [Statement(cypher, Dict(params))])

resp = Requests.post(url; headers=headers, json=body)

if resp.status != 200
error("Failed to commit transaction ($(resp.status)): $(txn)\n$(resp)")
end
respdata = Requests.json(resp)

if !isempty(respdata["errors"])
error(join(map(i -> (i * ": " * respdata["errors"][1][i]), keys(respdata["errors"][1])), "\n"));
end
# parse results into data sink
# Result(respdata["results"], respdata["errors"])
if !isempty(respdata["results"][1]["data"])
return parseResults(respdata["results"][1], elTypes = elTypes, nRowsElTypeCheck = nRowsElTypeCheck);
else
return DataFrames.DataFrame();
end
end

# Currently only supports DataFrames.DataFrame objects
# -> Future: Allow different data sink types, such as tables from JuliaDB
function parseResults(res::Dict{String, Any}; elTypes::Vector{DataType} = Vector(), nRowsElTypeCheck::Int = 100)
# Get elementary types from a column where there is no NA value (nothing)
if isempty(elTypes)
elTypes = getElTypes(res["data"], nRowsElTypeCheck);
end
colNames = collect(Symbol, res["columns"]);
nRows = length(res["data"]);

x = DataFrames.DataFrame(elTypes, colNames, nRows);

for (rowIdx, rowVal) in enumerate(res["data"])
for (colIdx,colVal) in enumerate(rowVal["row"])
if colVal != nothing
x[rowIdx,colIdx] = colVal;
end
end
end

return x;
end

function getElTypes(x::Vector{Any}, nRowsElTypeCheck::Int = 0)
nRecords = length(x);
elTypes::Vector{Type} = Type[Union{Void, Missings.Missing} for i in 1:length(x[1]["row"])];
nMaxRows = nRecords;
# elTypes = Type[Union{Void, Missings.Missing} for i in 1:length(x[1]["row"])];
nMaxRows = (nRowsElTypeCheck != 0 && nRowsElTypeCheck <= nMaxRows) ? nRowsElTypeCheck : nRecords;
checkIdx = trues(length(x[1]["row"]));
for i in 1:nMaxRows
# check each column individually
for el in find(checkIdx)
if !(x[i]["row"][el] == nothing)
if !(typeof(x[i]["row"][el]) === Array{Any,1})
elTypes[el] = i > 1 ?
Union{typeof(x[i]["row"][el]), Missings.Missing} :
typeof(x[i]["row"][el]);
else
elTypes[el] = i > 1 ?
Union{Vector{typeof(x[i]["row"][el][1])}, Missings.Missing} :
Vector{typeof(x[i]["row"][el][1])};
end
checkIdx[el] = false;
end
end
if isempty(find(checkIdx))
break;
end
end

return elTypes;
end
22 changes: 22 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -250,5 +250,27 @@ rollresult = rolltx("MATCH (n:Neo4jjl) WHERE n.name = 'John Doe' RETURN n"; subm
@test length(rollresult.results[1]["data"]) == 0
@test length(rollresult.errors) == 0

println("Success!");


# --- New cypherQuery using transaction/commit endpoint ---

print("[TEST] MATCH node and return DataFrame using cypherQuery()...")

# Open transaction and create node
loadtx = transaction(conn)
createnode(loadtx, "John Doe", 20; submit=true)
Neo4j.commit(loadtx)

matchresult = cypherQuery(conn,
"MATCH (n:Neo4jjl {name: {name}}) RETURN n.name AS Name, n.age AS Age;",
"name" => "John Doe")
@test DataFrames.DataFrame(Name = "John Doe", Age = 20) == matchresult

# Cleanup
deletetx = transaction(conn)
deletetx(query, "age" => 20)
deleteresult = commit(deletetx)

println("Success!");
println("--- All tests passed!");

0 comments on commit 3255739

Please sign in to comment.