From cd27bef8d1b6af33d6b9c4d3357482553f042d2a Mon Sep 17 00:00:00 2001
From: Joshua Gray <61458311+jgray-19@users.noreply.github.com>
Date: Wed, 8 Jan 2025 10:52:04 +0100
Subject: [PATCH] Convert MAD-NG track to TBT data (#21)

Adds a module provides functions to read and write ``MAD-NG`` turn-by-turn measurement files. These files
are in the **TFS** format.
---------

Co-authored-by: jgray-19 <jgray-19@users.noreply.github.com>
---
 doc/readers/index.rst                   |   4 +
 pyproject.toml                          |   4 +-
 tests/inputs/madng/fodo_track.mad       |  42 ++++++
 tests/inputs/madng/fodo_track.tfs       |  35 +++++
 tests/inputs/madng/fodo_track_error.tfs |  34 +++++
 tests/test_madng.py                     |  86 ++++++++++++
 turn_by_turn/__init__.py                |   2 +-
 turn_by_turn/io.py                      |   7 +-
 turn_by_turn/madng.py                   | 168 ++++++++++++++++++++++++
 turn_by_turn/sps.py                     |   2 +-
 10 files changed, 377 insertions(+), 7 deletions(-)
 create mode 100644 tests/inputs/madng/fodo_track.mad
 create mode 100644 tests/inputs/madng/fodo_track.tfs
 create mode 100644 tests/inputs/madng/fodo_track_error.tfs
 create mode 100644 tests/test_madng.py
 create mode 100644 turn_by_turn/madng.py

diff --git a/doc/readers/index.rst b/doc/readers/index.rst
index 03a34a9..36d0a95 100644
--- a/doc/readers/index.rst
+++ b/doc/readers/index.rst
@@ -31,3 +31,7 @@
 .. automodule:: turn_by_turn.trackone
     :members:
     :noindex:
+
+.. automodule:: turn_by_turn.madng
+    :members:
+    :noindex:
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 349ff7d..391385f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ authors = [
 ]
 license = "MIT"
 dynamic = ["version"]
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 
 classifiers = [
     "Development Status :: 5 - Production/Stable",
@@ -33,7 +33,6 @@ classifiers = [
     "Natural Language :: English",
     "Operating System :: OS Independent",
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
@@ -49,6 +48,7 @@ dependencies = [
     "pandas >= 2.1",
     "sdds >= 0.4",
     "h5py >= 2.9",
+    "tfs-pandas >= 4.0.0", # for madng (could be an optional dependency)
 ]
 
 [project.optional-dependencies]
diff --git a/tests/inputs/madng/fodo_track.mad b/tests/inputs/madng/fodo_track.mad
new file mode 100644
index 0000000..a82a19c
--- /dev/null
+++ b/tests/inputs/madng/fodo_track.mad
@@ -0,0 +1,42 @@
+MADX:open_env()
+circum = 60
+lcell = 20
+f =\ lcell/sin(pi/4)/4
+k =\ 1/f
+qf = multipole 'QF' { knl := {0,k} }
+qd = multipole 'QD' { knl := {0, -k} }
+bpm = monitor 'BPM' {  }
+
+seq = sequence 'SEQ' { refer = centre, l = circum,
+bpm 'BPM1' { at = 0 },
+qf 'QF' { at = 0 },
+qd 'QD' { at = 0.5 *lcell },
+qf 'QF' { at = 1.0 *lcell },
+bpm 'BPM3' { at = 1.5 *lcell },
+qd 'QD' { at = 1.5 *lcell },
+qf 'QF' { at = 2.0 *lcell },
+qd 'QD' { at = 2.5 *lcell },
+bpm 'BPM2' { at = 3 *lcell },
+}
+MADX:close_env()
+
+local seq in MADX
+
+local beam, track in MAD
+seq.beam = beam
+
+local observed in MAD.element.flags
+seq:deselect(observed)
+seq:select(observed, {pattern="BPM"})
+local part1 = {x = 1e-3, px = 0, y =-1e-3, py = 0, t = 0, pt = 0}
+local part2 = {x =-1e-3, px = 0, y = 1e-3, py = 0, t = 0, pt = 0}
+
+local mtbl = track {
+  sequence = seq,
+  X0 = {part1, part2},
+  nturn = 3,
+  }
+mtbl:write("fodo_track.tfs", {"name", "x", "y", "turn", "id"})
+
+mtbl:remove(#mtbl) -- remove the final row to produce an incomplete file
+mtbl:write("fodo_track_error.tfs", {"name", "x", "y", "turn", "id"})
\ No newline at end of file
diff --git a/tests/inputs/madng/fodo_track.tfs b/tests/inputs/madng/fodo_track.tfs
new file mode 100644
index 0000000..e0aa4f8
--- /dev/null
+++ b/tests/inputs/madng/fodo_track.tfs
@@ -0,0 +1,35 @@
+@ name               %03s     "SEQ"
+@ type               %05s     "track"
+@ title              %03s     "SEQ"
+@ origin             %18s     "MAD 1.0.0 Linux 64"
+@ date               %08s     "07/01/25"
+@ time               %08s     "15:38:30"
+@ refcol             %04s     "name"
+@ direction          %le       1
+@ observe            %le       1
+@ implicit           %b       false
+@ misalign           %b       false
+@ radiate            %b       false
+@ energy             %le       1
+@ deltap             %le       0
+@ lost               %le       0
+* name               x                  y                  turn               id
+$ %s                 %le                %le                %le                %le
+  "BPM1"              0.001             -0.001              1                  1
+  "BPM1"             -0.001              0.001              1                  2
+  "BPM3"             -0.0009999999503    0.00100000029      1                  1
+  "BPM3"              0.0009999999503   -0.00100000029      1                  2
+  "BPM2"              0.002414213831     0.0004142133507    1                  1
+  "BPM2"             -0.002414213831    -0.0004142133507    1                  2
+  "BPM1"              0.002414213831     0.0004142133507    2                  1
+  "BPM1"             -0.002414213831    -0.0004142133507    2                  2
+  "BPM3"             -0.0004142138307   -0.002414213351     2                  1
+  "BPM3"              0.0004142138307    0.002414213351     2                  2
+  "BPM2"             -0.0009999991309    0.001000000149     2                  1
+  "BPM2"              0.0009999991309   -0.001000000149     2                  2
+  "BPM1"             -0.0009999991309    0.001000000149     3                  1
+  "BPM1"              0.0009999991309   -0.001000000149     3                  2
+  "BPM3"              0.0009999998012   -0.001000001159     3                  1
+  "BPM3"             -0.0009999998012    0.001000001159     3                  2
+  "BPM2"             -0.002414214191    -0.0004142129907    3                  1
+  "BPM2"              0.002414214191     0.0004142129907    3                  2
diff --git a/tests/inputs/madng/fodo_track_error.tfs b/tests/inputs/madng/fodo_track_error.tfs
new file mode 100644
index 0000000..297ce64
--- /dev/null
+++ b/tests/inputs/madng/fodo_track_error.tfs
@@ -0,0 +1,34 @@
+@ name               %03s     "SEQ"
+@ type               %05s     "track"
+@ title              %03s     "SEQ"
+@ origin             %18s     "MAD 1.0.0 Linux 64"
+@ date               %08s     "07/01/25"
+@ time               %08s     "15:38:30"
+@ refcol             %04s     "name"
+@ direction          %le       1
+@ observe            %le       1
+@ implicit           %b       false
+@ misalign           %b       false
+@ radiate            %b       false
+@ energy             %le       1
+@ deltap             %le       0
+@ lost               %le       0
+* name               x                  y                  turn               id
+$ %s                 %le                %le                %le                %le
+  "BPM1"              0.001             -0.001              1                  1
+  "BPM1"             -0.001              0.001              1                  2
+  "BPM3"             -0.0009999999503    0.00100000029      1                  1
+  "BPM3"              0.0009999999503   -0.00100000029      1                  2
+  "BPM2"              0.002414213831     0.0004142133507    1                  1
+  "BPM2"             -0.002414213831    -0.0004142133507    1                  2
+  "BPM1"              0.002414213831     0.0004142133507    2                  1
+  "BPM1"             -0.002414213831    -0.0004142133507    2                  2
+  "BPM3"             -0.0004142138307   -0.002414213351     2                  1
+  "BPM3"              0.0004142138307    0.002414213351     2                  2
+  "BPM2"             -0.0009999991309    0.001000000149     2                  1
+  "BPM2"              0.0009999991309   -0.001000000149     2                  2
+  "BPM1"             -0.0009999991309    0.001000000149     3                  1
+  "BPM1"              0.0009999991309   -0.001000000149     3                  2
+  "BPM3"              0.0009999998012   -0.001000001159     3                  1
+  "BPM3"             -0.0009999998012    0.001000001159     3                  2
+  "BPM2"             -0.002414214191    -0.0004142129907    3                  1
diff --git a/tests/test_madng.py b/tests/test_madng.py
new file mode 100644
index 0000000..8ef0390
--- /dev/null
+++ b/tests/test_madng.py
@@ -0,0 +1,86 @@
+
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from tests.test_lhc_and_general import INPUTS_DIR, compare_tbt
+from turn_by_turn import madng, read_tbt, write_tbt
+from turn_by_turn.structures import TbtData, TransverseData
+
+
+def test_read_ng(_ng_file):
+    original = _original_simulation_data()
+    
+    # Check directly from the module
+    new = madng.read_tbt(_ng_file)
+    compare_tbt(original, new, no_binary=True)
+    
+    # Check from the main function
+    new = read_tbt(_ng_file, datatype="madng")
+    compare_tbt(original, new, no_binary=True)
+
+def test_write_ng(_ng_file, tmp_path):
+    original_tbt = _original_simulation_data()
+    
+    # Write the data
+    from_tbt = tmp_path / "from_tbt.tfs"
+    madng.write_tbt(from_tbt, original_tbt)
+    
+    # Read the written data
+    new_tbt = madng.read_tbt(from_tbt)
+    compare_tbt(original_tbt, new_tbt, no_binary=True)
+
+    # Check from the main function
+    original_tbt = read_tbt(_ng_file, datatype="madng")
+    write_tbt(from_tbt, original_tbt, datatype="madng")
+
+    new_tbt = read_tbt(from_tbt, datatype="madng")
+    compare_tbt(original_tbt, new_tbt, no_binary=True)
+    assert original_tbt.date == new_tbt.date
+    
+def test_error_ng(_error_file):
+    with pytest.raises(ValueError):
+        read_tbt(_error_file, datatype="madng")
+
+# ---- Helpers ---- #
+def _original_simulation_data() -> TbtData:
+    # Create a TbTData object with the original data
+    names = np.array(["BPM1", "BPM3", "BPM2"])
+    bpm1_p1_x = np.array([ 1e-3, 0.002414213831,-0.0009999991309])
+    bpm1_p1_y = np.array([-1e-3, 0.0004142133507, 0.001000000149])
+    bpm1_p2_x = np.array([-1e-3,-0.002414213831, 0.0009999991309])
+    bpm1_p2_y = np.array([ 1e-3,-0.0004142133507,-0.001000000149])
+
+    bpm2_p1_x = np.array([-0.0009999999503,-0.0004142138307, 0.0009999998012])
+    bpm2_p1_y = np.array([ 0.00100000029,-0.002414213351,-0.001000001159])
+    bpm2_p2_x = np.array([ 0.0009999999503, 0.0004142138307,-0.0009999998012])
+    bpm2_p2_y = np.array([-0.00100000029, 0.002414213351, 0.001000001159])
+
+    bpm3_p1_x = np.array([ 0.002414213831,-0.0009999991309,-0.002414214191])
+    bpm3_p1_y = np.array([ 0.0004142133507, 0.001000000149,-0.0004142129907])
+    bpm3_p2_x = np.array([-0.002414213831, 0.0009999991309, 0.002414214191])
+    bpm3_p2_y = np.array([-0.0004142133507,-0.001000000149, 0.0004142129907])
+
+    matrix = [
+        TransverseData(  # first particle
+            X=pd.DataFrame(index=names, data=[bpm1_p1_x, bpm2_p1_x, bpm3_p1_x]),
+            Y=pd.DataFrame(index=names, data=[bpm1_p1_y, bpm2_p1_y, bpm3_p1_y]),
+        ),
+        TransverseData(  # second particle
+            X=pd.DataFrame(index=names, data=[bpm1_p2_x, bpm2_p2_x, bpm3_p2_x]),
+            Y=pd.DataFrame(index=names, data=[bpm1_p2_y, bpm2_p2_y, bpm3_p2_y]),
+        ),
+    ]
+    return TbtData(matrices=matrix, bunch_ids=[1, 2], nturns=3)
+
+
+# ---- Fixtures ---- #
+@pytest.fixture
+def _ng_file(tmp_path):
+    return INPUTS_DIR / "madng" / "fodo_track.tfs"
+
+@pytest.fixture
+def _error_file(tmp_path):
+    return INPUTS_DIR / "madng" / "fodo_track_error.tfs"
\ No newline at end of file
diff --git a/turn_by_turn/__init__.py b/turn_by_turn/__init__.py
index 5bd0dbf..54835f5 100644
--- a/turn_by_turn/__init__.py
+++ b/turn_by_turn/__init__.py
@@ -5,7 +5,7 @@
 __title__ = "turn_by_turn"
 __description__ = "Read and write turn-by-turn measurement files from different particle accelerator formats."
 __url__ = "https://github.com/pylhc/turn_by_turn"
-__version__ = "0.7.2"
+__version__ = "0.8.0"
 __author__ = "pylhc"
 __author_email__ = "pylhc@github.com"
 __license__ = "MIT"
diff --git a/turn_by_turn/io.py b/turn_by_turn/io.py
index 6cbfb51..4e8a318 100644
--- a/turn_by_turn/io.py
+++ b/turn_by_turn/io.py
@@ -2,7 +2,7 @@
 IO
 --
 
-This module contains high-level I/O functions to read and write tur-by-turn data objects in different
+This module contains high-level I/O functions to read and write turn-by-turn data objects in different
 formats. While data can be loaded from the formats of different machines / codes, each format getting its
 own reader module, writing functionality is at the moment always done in the ``LHC``'s **SDDS** format.
 """
@@ -10,7 +10,7 @@
 from pathlib import Path
 from typing import Union, Any
 
-from turn_by_turn import ascii, doros, esrf, iota, lhc, ptc, sps, trackone
+from turn_by_turn import ascii, doros, esrf, iota, lhc, ptc, sps, trackone, madng
 from turn_by_turn.ascii import write_ascii
 from turn_by_turn.errors import DataTypeError
 from turn_by_turn.structures import TbtData
@@ -29,8 +29,9 @@
     ptc=ptc,
     trackone=trackone,
     ascii=ascii,
+    madng=madng,
 )
-WRITERS = ("lhc", "sps", "doros", "doros_positions", "doros_oscillations", "ascii")  # implemented writers
+WRITERS = ("lhc", "sps", "doros", "doros_positions", "doros_oscillations", "ascii", "madng")  # implemented writers
 
 write_lhc_ascii = write_ascii  # Backwards compatibility <0.4
 
diff --git a/turn_by_turn/madng.py b/turn_by_turn/madng.py
new file mode 100644
index 0000000..b4e1c41
--- /dev/null
+++ b/turn_by_turn/madng.py
@@ -0,0 +1,168 @@
+"""
+MAD-NG
+------
+
+This module provides functions to read and write ``MAD-NG`` turn-by-turn measurement files. These files
+are in the **TFS** format.
+
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+import logging
+from pathlib import Path
+
+import pandas as pd
+import tfs
+
+from turn_by_turn.structures import TbtData, TransverseData
+
+LOGGER = logging.getLogger()
+
+# Define the column names in the TFS file
+NAME = "name"
+ELEMENT_INDEX = "eidx"
+TURN = "turn"
+PARTICLE_ID = "id"
+
+# Define the header names in the TFS file
+HNAME = "name"
+ORIGIN = "origin"
+DATE = "date"
+TIME = "time"
+REFCOL = "refcol"
+
+
+def read_tbt(file_path: str | Path) -> TbtData:
+    """
+    Reads turn-by-turn data from the ``MAD-NG`` **TFS** format file.
+
+    Args:
+        file_path (str | Path): path to the turn-by-turn measurement file.
+
+    Returns:
+        A ``TbTData`` object with the loaded data.
+    """
+    LOGGER.debug("Starting to read TBT data from dataframe")
+    df = tfs.read(file_path)
+
+    # Get the date and time from the headers (return None if not found)
+    date_str = df.headers.get(DATE)
+    time_str = df.headers.get(TIME)
+    
+    # Combine the date and time into a datetime object
+    date = None
+    if date_str and time_str:
+        date = datetime.strptime(f"{date_str} {time_str}", "%d/%m/%y %H:%M:%S")
+    elif date_str:
+        date = datetime.strptime(date_str, "%d/%m/%y")
+
+    nturns = int(df.iloc[-1].loc[TURN])
+    npart = int(df.iloc[-1].loc[PARTICLE_ID])
+    LOGGER.info(f"Number of turns: {nturns}, Number of particles: {npart}")
+
+    # Get the names of the observed points (BPMs) from the first particle's first turn
+    df = df.set_index([PARTICLE_ID, TURN]).sort_index()
+    observe_points = df.loc[(1, 1)][NAME].to_numpy()
+    num_observables = len(observe_points)
+
+    # Check if the number of observed points is consistent for all particles/turns
+    if len(df[NAME]) / nturns / npart != num_observables:
+        raise ValueError(
+            "The number of BPMs (or observed points) is not consistent for all particles/turns. Simulation may have lost particles."
+        )
+
+    matrices = []
+    bunch_ids = range(1, npart + 1)  # Particle IDs start from 1 (not 0)
+
+    for particle_id in bunch_ids:
+        LOGGER.info(f"Processing particle ID: {particle_id}")
+
+        # Filter the dataframe for the current particle
+        df_particle = df.loc[particle_id]
+
+        # Create a dictionary of the TransverseData fields
+        tracking_data_dict = {
+            plane: pd.DataFrame(
+                index=observe_points,
+                data=df_particle[plane.lower()]  # MAD-NG uses lower case for the planes
+                .to_numpy()
+                .reshape(num_observables, nturns, order="F"),
+                # ^ Number of Observables x Number of turns, Fortran order (So that the observables are the rows)
+            )
+            for plane in TransverseData.fieldnames()  # X, Y
+        }
+
+        # Append the TransverseData object to the matrices list
+        # We don't use TrackingData, as MAD-NG does not provide energy
+        matrices.append(TransverseData(**tracking_data_dict))
+
+    LOGGER.debug("Finished reading TBT data")
+    return TbtData(matrices=matrices, bunch_ids=list(bunch_ids), nturns=nturns, date=date)
+
+
+def write_tbt(output_path: str | Path, tbt_data: TbtData) -> None:
+    """
+    Writes turn-by-turn data to a TFS file for MAD-NG.
+
+    Args:
+        tbt_data (TbtData): Turn-by-turn data to write.
+        file_path (str | Path): Target file path.
+    """
+    planes = [plane.lower() for plane in TransverseData.fieldnames()]  # x, y
+    plane_dfs = {plane: [] for plane in planes}
+
+    for particle_id, transverse_data in zip(tbt_data.bunch_ids, tbt_data.matrices):
+        for plane in planes:
+            # Create a dataframe for the current plane and particle
+            particle_df: pd.DataFrame = transverse_data[plane.upper()].copy()
+
+            # Create the name column from the index
+            particle_df.index.name = NAME
+            particle_df = particle_df.reset_index()
+
+            # Add the element index column (to be used for merging)
+            particle_df[ELEMENT_INDEX] = particle_df.index
+
+            # Melt the dataframe to have columns: name, element index, turn, plane
+            particle_df = pd.melt(
+                particle_df,
+                id_vars=[NAME, ELEMENT_INDEX],
+                var_name=TURN,
+                value_name=plane,
+            )
+
+            # Add the particle ID column
+            particle_df[PARTICLE_ID] = particle_id
+
+            # Convert the turn column to integer and increment by 1 (MAD-NG uses 1-based indexing)
+            particle_df[TURN] = particle_df[TURN].astype(int) + 1
+
+            # Append the dataframe to the list
+            plane_dfs[plane].append(particle_df)
+
+    # Merge the dataframes on name, turn, particle ID and element index for both planes
+    df_x = pd.concat(plane_dfs[planes[0]])
+    df_y = pd.concat(plane_dfs[planes[1]])
+    merged_df = pd.merge(df_x, df_y, on=[NAME, TURN, PARTICLE_ID, ELEMENT_INDEX])
+    merged_df = merged_df.set_index([NAME])
+
+    # Sort the dataframe by turn, element index and particle ID (so the format is consistent with MAD-NG)
+    merged_df = merged_df.sort_values(by=[TURN, ELEMENT_INDEX, PARTICLE_ID])
+
+    # Drop the element index column (this is not the real element index, but a temporary one for merging)
+    merged_df = merged_df.drop(columns=[ELEMENT_INDEX])
+
+    # Set the columns to x, y, turn, id, for consistency.
+    merged_df = merged_df[[planes[0], planes[1], TURN, PARTICLE_ID]]
+
+    # Write the dataframe to a TFS file
+    headers = {
+        HNAME: "TbtData",
+        ORIGIN: "Python",
+        DATE: tbt_data.date.strftime("%d/%m/%y"),
+        TIME: tbt_data.date.strftime("%H:%M:%S"),
+        REFCOL: NAME,
+    }
+    tfs.write(output_path, merged_df, headers_dict=headers, save_index=NAME)
diff --git a/turn_by_turn/sps.py b/turn_by_turn/sps.py
index 4e5a033..2a97fe2 100644
--- a/turn_by_turn/sps.py
+++ b/turn_by_turn/sps.py
@@ -62,7 +62,7 @@ def read_tbt(file_path: Union[str, Path], remove_trailing_bpm_plane: bool = True
     tbt_data_y = [sdds_file.values[bpm] for bpm in bpm_names_y]
     
     if remove_trailing_bpm_plane:
-        pattern = re.compile("\.[HV]$", flags=re.IGNORECASE)
+        pattern = re.compile(r"\.[HV]$", flags=re.IGNORECASE)
         bpm_names_x  = [pattern.sub("", bpm) for bpm in bpm_names_x]
         bpm_names_y  = [pattern.sub("", bpm) for bpm in bpm_names_y]