Skip to content

Commit

Permalink
Merge pull request #98 from spectral-cockpit/13-unittest
Browse files Browse the repository at this point in the history
- add initial tests; simplify header parsing
  • Loading branch information
philipp-baumann authored Nov 11, 2023
2 parents 009ed2b + e14581a commit 3941d3c
Show file tree
Hide file tree
Showing 23 changed files with 213 additions and 139 deletions.
1 change: 1 addition & 0 deletions R/calc_parameter_chunk_size.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ calc_parameter_chunk_size.data <- function(ds) {
#' calculate the full chunk size
#' @param ds dataset
calc_chunk_size <- function(ds) {

chunk_size <- ds$next_offset - ds$offset
return(chunk_size)
}
1 change: 1 addition & 0 deletions R/extract_metadata.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
get_basic_metadata <- function(ds_list) {
timestamp <- get_meta_timestamp(ds_list)


basic_metadata <- data.frame(
# opus_filename,
# opus_path,
Expand Down
39 changes: 22 additions & 17 deletions R/parse_chunk.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
#'
#' @param ds chunk dataset
#'
#' @param con connection to raw vector
#' @param raw raw vector
#'
#' @keywords internal
#' @family parsing
parse_chunk <- function(ds, con) UseMethod("parse_chunk")
parse_chunk <- function(ds,raw) UseMethod("parse_chunk")



Expand All @@ -15,7 +15,8 @@ parse_chunk <- function(ds, con) UseMethod("parse_chunk")
#' @inheritParams parse_chunk
#'
#' @keywords internal
parse_chunk.default <- function(ds, con) {
parse_chunk.default <- function(ds,raw) {

return(ds)
}

Expand All @@ -25,8 +26,9 @@ parse_chunk.default <- function(ds, con) {
#' @inheritParams parse_chunk
#'
#' @keywords internal
parse_chunk.text <- function(ds, con) {
text <- read_character(con, ds$offset, n = ds$chunk_size)
parse_chunk.text <- function(ds,raw) {

text <- read_character(raw, ds$offset+1, n = ds$chunk_size, n_char = ds$chunk_size)

ds$text <- text
return(ds)
Expand All @@ -37,11 +39,12 @@ parse_chunk.text <- function(ds, con) {
#' @inheritParams parse_chunk
#'
#' @keywords internal
parse_chunk.parameter <- function(ds, con) {
if (ds$text_type %in% c(112, 104, 144)) {
cursor <- ds$offset + 12
parse_chunk.parameter <- function(ds,raw) {

if (ds$text_type %in% c(104, 112, 144)) {
cursor <- ds$offset + 13
} else {
cursor <- ds$offset
cursor <- ds$offset + 1
}

chunk_size <- ds$chunk_size
Expand All @@ -51,7 +54,8 @@ parse_chunk.parameter <- function(ds, con) {
result_list <- list()

repeat {
parameter_name <- read_character(con, cursor, n = 1L)

parameter_name <- read_character(raw, cursor, n = 1, n_char = 3)

if (parameter_name == "END") {
break
Expand All @@ -61,21 +65,21 @@ parse_chunk.parameter <- function(ds, con) {

# need to add since index that is returned starts with 0;
# R index starts at 1
type_index <- read_unsigned_int(con, cursor + 4, n = 1L) + 1
type_index <- read_unsigned_int(raw, cursor + 4, n = 1L) + 1

parameter_type <- parameter_types[type_index]

parameter_size <- read_unsigned_int(con, cursor + 6, n = 1L)
parameter_size <- read_unsigned_int(raw, cursor + 6, n = 1L)

cursor_value <- cursor + 8


if (type_index == 1) {
parameter_value <- read_signed_int(con, cursor_value, n = 1L)
parameter_value <- read_signed_int(raw, cursor_value, n = 1L)
} else if (type_index == 2) {
parameter_value <- read_double(con, cursor_value, n = 1L)
parameter_value <- read_double(raw, cursor_value, n = 1L)
} else if (type_index %in% c(3, 4, 5)) {
parameter_value <- read_character(con, cursor_value, n = 1L)
parameter_value <- read_character(raw, cursor_value, n = 1L, n_char = parameter_size)
}

repeat_list <- list(
Expand Down Expand Up @@ -108,8 +112,9 @@ parse_chunk.parameter <- function(ds, con) {
#' @inheritParams parse_chunk
#'
#' @keywords internal
parse_chunk.data <- function(ds, con) {
data <- read_float(con, ds$offset, n = ds$chunk_size)
parse_chunk.data <- function(ds, raw) {

data <- read_float(raw, ds$offset+1, n = ds$chunk_size)

ds$data <- data
return(ds)
Expand Down
94 changes: 41 additions & 53 deletions R/parse_header.R
Original file line number Diff line number Diff line change
@@ -1,67 +1,25 @@
#' parse the header of the opus file
#'
#' @param raw_size raw vector of the opus binary file
#'
#' @param con connection to the raw vector
#' @param raw raw vector of the opus binary file
#' @family parsing
#' @keywords internal
parse_header <- function(raw_size, con) {
parse_header <- function(raw) {
# header length in bytes
header_length <- 504L

# set first start cursor in bytes;
# following github.com/qedsoftware/brukeropusreader
cursor <- 24L
# number of bytes of block metainfo
meta_block_size <- 12L

# file size in bytes
# file_size <- length(raw)

result_list <- list()

repeat {
if (cursor + meta_block_size >= header_length) {
break
}

block_type <- read_unsigned_int(con, cursor)
channel_type <- read_unsigned_int(con, cursor + 1L)
text_type <- read_unsigned_int(con, cursor + 2L)
# we can discuss the name here
additional_type <- read_unsigned_int(con, cursor + 3L)
chunk_size <- read_signed_int(con, cursor + 4L)
offset <- read_signed_int(con, cursor + 8L)

if (offset <= 0L) {
break
}

next_offset <- offset + 4L * chunk_size

repeat_list <- list(
block_type = block_type,
channel_type = channel_type,
text_type = text_type,
additional_type = additional_type,
offset = offset,
next_offset = next_offset,
chunk_size = chunk_size
)

result_list <- c(result_list, list(repeat_list))

if (next_offset >= raw_size) {
break
}

cursor <- cursor + 12L
}
start_cursor <- 25L

all_cursors <- seq(start_cursor, header_length, 12L)

# exclude the header chunk, since it is read in this function
result_list <- result_list[-1L]
out <- lapply(all_cursors, function(x) test_header_parse(raw, x))

return(result_list)
out[sapply(out, is.null)] <- NULL

out <- out[-1]

return(out)
}


Expand All @@ -71,3 +29,33 @@ parse_header <- function(raw_size, con) {
dec_to_ascii <- function(n) {
rawToChar(as.raw(n))
}



test_header_parse <- function(raw, cursor) {
offset <- read_signed_int(raw, cursor + 8L)

if (offset <= 0L) {
return(NULL)
}
chunk_size <- read_signed_int(raw, cursor + 4L)
next_offset <- offset + 4L * chunk_size

# if(next_offset > length(raw)){
# browser()
# chunk_size <- length(raw) - offset
# }


repeat_list <- list(
block_type = read_unsigned_int(raw, cursor),
channel_type = read_unsigned_int(raw, cursor + 1L),
text_type = read_unsigned_int(raw, cursor + 2L),
additional_type = read_unsigned_int(raw, cursor + 3L),
offset = offset,
next_offset = next_offset,
chunk_size = chunk_size
)

return(repeat_list)
}
10 changes: 4 additions & 6 deletions R/parse_opus.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,9 @@
#' @family core
#' @export
parse_opus <- function(raw, data_only) {
con <- rawConnection(raw)

raw_size <- length(raw)

header_data <- parse_header(raw_size, con)
header_data <- parse_header(raw)

dataset_list <- lapply(header_data, create_dataset)

Expand Down Expand Up @@ -152,7 +150,7 @@ parse_opus <- function(raw, data_only) {
dataset_list <- lapply(dataset_list, calc_parameter_chunk_size)
}

dataset_list <- lapply(dataset_list, function(x) parse_chunk(x, con))
dataset_list <- lapply(dataset_list, function(x) parse_chunk(x, raw))

data_types <- get_data_types(dataset_list) # nolint

Expand All @@ -167,9 +165,9 @@ parse_opus <- function(raw, data_only) {

dataset_list <- sort_list_by(dataset_list)

on.exit(close(con))

class(dataset_list) <- c("opusreader2", class(dataset_list))

raw <- NULL

return(dataset_list)
}
61 changes: 44 additions & 17 deletions R/read_bin_types.R
Original file line number Diff line number Diff line change
@@ -1,80 +1,107 @@
#' read unsigned integer from binary
#'
#' @param con connection to raw vector
#' @param raw raw vector
#' @param cursor offset
#' @param n number of elements
read_unsigned_int <- function(con, cursor, n = 1L) {
seek_opus(con, cursor)
read_unsigned_int <- function(raw, cursor, n = 1L) {
n_int <- n * 1

# seek_opus(con, cursor)
out <- readBin(
con,
raw[cursor:(cursor + n_int)],
what = "integer",
n = n,
size = 1L,
endian = "little",
signed = FALSE
)

raw <- NULL

return(out)
}

#' read signed integer from binary
#'
#' @inheritParams read_unsigned_int
read_signed_int <- function(con, cursor, n = 1L) {
seek_opus(con, cursor)
read_signed_int <- function(raw, cursor, n = 1L) {
n_signed_int <- n * 4

# seek_opus(con, cursor)
out <- readBin(
con,
raw[cursor:(cursor + n_signed_int)],
what = "integer",
n = n,
size = 4L,
endian = "little"
)

raw <- NULL

return(out)
}

#' read character from binary
#'
#' @inheritParams read_unsigned_int
#' @param n_char integer with number of desired characters to read from raw
#' @param encoding encoding to assign character strings that are read. Default
#' is `"latin1"`., which will use Windows Latin 1 (ANSI) encoding. This is
#' how Bruker software OPUS is assumed to commonly store strings.
read_character <- function(con, cursor, n = 1L, encoding = "latin1") {
seek_opus(con, cursor)
read_character <- function(raw, cursor, n = 1L, n_char, encoding = "latin1") {
# seek_opus(con, cursor)


out <- readBin(
con,
raw[cursor:(cursor + n_char)],
what = "character",
n = n,
size = 1,
endian = "little"
)

raw <- NULL

Encoding(out) <- encoding

return(out)
}

#' read float from binary (single-precision, 32 bits)
#'
#' @inheritParams read_unsigned_int
read_float <- function(con, cursor, n = 1L) {
seek_opus(con, cursor)
read_float <- function(raw, cursor, n = 1L) {
n_float <- n * 4

# seek_opus(con, cursor)
out <- readBin(
con,
raw[cursor:(cursor + n_float)],
what = "double",
n = n,
size = 4L,
endian = "little"
)

raw <- NULL

return(out)
}

#' read double from binary (double-precision, 64 bits)
#'
#' @inheritParams read_unsigned_int
read_double <- function(con, cursor, n = 1L) {
seek_opus(con, cursor)
read_double <- function(raw, cursor, n = 1L) {
n_double <- n * 8

# seek_opus(con, cursor)
out <- readBin(
con,
raw[cursor:(cursor + n_double)],
what = "double",
n = n,
size = 8L,
endian = "little"
)

raw <- NULL

return(out)
}
6 changes: 0 additions & 6 deletions R/seek_opus.R

This file was deleted.

Binary file not shown.
Loading

0 comments on commit 3941d3c

Please sign in to comment.