In [1]:
Copied!
content = """LOBMP.MC,Market By Price,2025-03-04T16:01:39.983017253Z,+2,Raw,UPDATE,UNSPECIFIED,,,,100,,260,0
,,,,Summary,,,,,,,,,3
,,,,FID,3855,,QUOTIM_MS,50187282,
,,,,FID,4148,,TIMACT_MS,50187282,
,,,,FID,14269,,TIMACT_NS,16:01:39.983017253,
,,,,MapEntry,,UPDATE,,,,,,1100.300000_B,8
,,,,FID,266,,BID_TIME,13:56:27.000000000,
,,,,FID,3427,,ORDER_PRC,1100.3,
,,,,FID,3428,,ORDER_SIDE,1,BID
,,,,FID,3430,,NO_ORD,10,
,,,,FID,4356,,ACC_SIZE,100,
,,,,FID,6527,,LV_TIM_MS,50187282,
,,,,FID,6529,,LV_DATE,2025-04-25,
,,,,FID,14268,,LV_TIM_NS,16:01:39.983017253,
LOBMP.MC,Market By Price,2025-03-04T16:02:39.983017253Z,+2,Raw,UPDATE,UNSPECIFIED,,,,100,,260,0
,,,,Summary,,,,,,,,,3
,,,,FID,3855,,QUOTIM_MS,50187282,
,,,,FID,4148,,TIMACT_MS,50187282,
,,,,FID,14269,,TIMACT_NS,16:01:39.983017253,
,,,,MapEntry,,UPDATE,,,,,,1100.40000_B,8
,,,,FID,266,,BID_TIME,13:56:27.000000000,
,,,,FID,3427,,ORDER_PRC,11100.455,
,,,,FID,3428,,ORDER_SIDE,1,BID
,,,,FID,3430,,NO_ORD,8,
,,,,FID,4356,,ACC_SIZE,307,
,,,,FID,6527,,LV_TIM_MS,50187282,
,,,,FID,6529,,LV_DATE,2025-04-25,
,,,,FID,14268,,LV_TIM_NS,16:01:39.983017253,
TEST.MC,Market By Price,2025-03-04T16:01:39.983017253Z,+2,Raw,UPDATE,UNSPECIFIED,,,,100,,260,0
,,,,Summary,,,,,,,,,3
,,,,FID,3855,,QUOTIM_MS,50187282,
,,,,FID,4148,,TIMACT_MS,50187282,
,,,,FID,14269,,TIMACT_NS,16:01:39.983017253,
,,,,MapEntry,,ADD,,,,,,1100.300000_B,8
,,,,FID,266,,BID_TIME,13:56:27.000000000,
,,,,FID,3427,,ORDER_PRC,1100.3,
,,,,FID,3428,,ORDER_SIDE,1,BID
,,,,FID,3430,,NO_ORD,1,
,,,,FID,4356,,ACC_SIZE,100,
,,,,FID,6527,,LV_TIM_MS,50187282,
,,,,FID,6529,,LV_DATE,2025-04-26,
,,,,FID,14268,,LV_TIM_NS,16:01:39.983017253,
TEST.MC,Market By Price,2025-03-04T16:02:39.983017253Z,+2,Raw,UPDATE,UNSPECIFIED,,,,100,,260,0
,,,,Summary,,,,,,,,,3
,,,,FID,3855,,QUOTIM_MS,50187282,
,,,,FID,4148,,TIMACT_MS,50187282,
,,,,MapEntry,,ADD,,,,,,1100.40000_B,8
,,,,FID,266,,BID_TIME,13:56:27.000000000,
,,,,FID,3427,,ORDER_PRC,11100.455,
,,,,FID,3428,,ORDER_SIDE,1,BID
,,,,FID,3430,,NO_ORD,2,
,,,,FID,4356,,ACC_SIZE,307,
,,,,FID,6527,,LV_TIM_MS,50187282,
,,,,FID,6529,,LV_DATE,2025-04-26,
,,,,FID,14268,,LV_TIM_NS,16:01:39.983017253,
"""
with open("testfile_lobmp.csv", "w") as f:
f.write(content)
content = """LOBMP.MC,Market By Price,2025-03-04T16:01:39.983017253Z,+2,Raw,UPDATE,UNSPECIFIED,,,,100,,260,0
,,,,Summary,,,,,,,,,3
,,,,FID,3855,,QUOTIM_MS,50187282,
,,,,FID,4148,,TIMACT_MS,50187282,
,,,,FID,14269,,TIMACT_NS,16:01:39.983017253,
,,,,MapEntry,,UPDATE,,,,,,1100.300000_B,8
,,,,FID,266,,BID_TIME,13:56:27.000000000,
,,,,FID,3427,,ORDER_PRC,1100.3,
,,,,FID,3428,,ORDER_SIDE,1,BID
,,,,FID,3430,,NO_ORD,10,
,,,,FID,4356,,ACC_SIZE,100,
,,,,FID,6527,,LV_TIM_MS,50187282,
,,,,FID,6529,,LV_DATE,2025-04-25,
,,,,FID,14268,,LV_TIM_NS,16:01:39.983017253,
LOBMP.MC,Market By Price,2025-03-04T16:02:39.983017253Z,+2,Raw,UPDATE,UNSPECIFIED,,,,100,,260,0
,,,,Summary,,,,,,,,,3
,,,,FID,3855,,QUOTIM_MS,50187282,
,,,,FID,4148,,TIMACT_MS,50187282,
,,,,FID,14269,,TIMACT_NS,16:01:39.983017253,
,,,,MapEntry,,UPDATE,,,,,,1100.40000_B,8
,,,,FID,266,,BID_TIME,13:56:27.000000000,
,,,,FID,3427,,ORDER_PRC,11100.455,
,,,,FID,3428,,ORDER_SIDE,1,BID
,,,,FID,3430,,NO_ORD,8,
,,,,FID,4356,,ACC_SIZE,307,
,,,,FID,6527,,LV_TIM_MS,50187282,
,,,,FID,6529,,LV_DATE,2025-04-25,
,,,,FID,14268,,LV_TIM_NS,16:01:39.983017253,
TEST.MC,Market By Price,2025-03-04T16:01:39.983017253Z,+2,Raw,UPDATE,UNSPECIFIED,,,,100,,260,0
,,,,Summary,,,,,,,,,3
,,,,FID,3855,,QUOTIM_MS,50187282,
,,,,FID,4148,,TIMACT_MS,50187282,
,,,,FID,14269,,TIMACT_NS,16:01:39.983017253,
,,,,MapEntry,,ADD,,,,,,1100.300000_B,8
,,,,FID,266,,BID_TIME,13:56:27.000000000,
,,,,FID,3427,,ORDER_PRC,1100.3,
,,,,FID,3428,,ORDER_SIDE,1,BID
,,,,FID,3430,,NO_ORD,1,
,,,,FID,4356,,ACC_SIZE,100,
,,,,FID,6527,,LV_TIM_MS,50187282,
,,,,FID,6529,,LV_DATE,2025-04-26,
,,,,FID,14268,,LV_TIM_NS,16:01:39.983017253,
TEST.MC,Market By Price,2025-03-04T16:02:39.983017253Z,+2,Raw,UPDATE,UNSPECIFIED,,,,100,,260,0
,,,,Summary,,,,,,,,,3
,,,,FID,3855,,QUOTIM_MS,50187282,
,,,,FID,4148,,TIMACT_MS,50187282,
,,,,MapEntry,,ADD,,,,,,1100.40000_B,8
,,,,FID,266,,BID_TIME,13:56:27.000000000,
,,,,FID,3427,,ORDER_PRC,11100.455,
,,,,FID,3428,,ORDER_SIDE,1,BID
,,,,FID,3430,,NO_ORD,2,
,,,,FID,4356,,ACC_SIZE,307,
,,,,FID,6527,,LV_TIM_MS,50187282,
,,,,FID,6529,,LV_DATE,2025-04-26,
,,,,FID,14268,,LV_TIM_NS,16:01:39.983017253,
"""
with open("testfile_lobmp.csv", "w") as f:
f.write(content)
In [2]:
Copied!
! lobmp testfile_lobmp.csv ./output --verbose DEBUG
! lobmp testfile_lobmp.csv ./output --verbose DEBUG
2025-04-26 16:42:36,701 lobmp [MainThread] [DEBUG] Started with timing: Execute run 2025-04-26 16:42:36,702 lobmp [MainThread] [INFO] Counting lines in file and recognising possible FIDs... 2025-04-26 16:42:36,702 lobmp [MainThread] [INFO] Found 55 lines in file 2025-04-26 16:42:36,702 lobmp [MainThread] [DEBUG] Using 8 CPUs 2025-04-26 16:42:36,703 lobmp [MainThread] [INFO] Starting file processing... 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Parsing thread completed successfully 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Parsing thread completed successfully 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Parsing thread completed successfully 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Parsing thread completed successfully 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Parsing thread completed successfully 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Parsing thread completed successfully 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Parsing thread completed successfully 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Parsing thread completed successfully 2025-04-26 16:42:36,719 lobmp [MainThread] [DEBUG] Writing queue is empty! 2025-04-26 16:42:36,739 lobmp [MainThread] [DEBUG] Writing thread completed successfully 2025-04-26 16:42:36,739 lobmp [MainThread] [DEBUG] Completed with timing: Execute run (Elapsed: 0.04s)
You should now have the following directory structure
.
├── output
│ └── testfile_lobmp.parquet
│ └── part-000000.parquet
└── testfile_lobmp.csv
In [3]:
Copied!
import polars as pl
path = "./output/testfile_lobmp.parquet/"
df = pl.read_parquet(path)
import polars as pl
path = "./output/testfile_lobmp.parquet/"
df = pl.read_parquet(path)
In [4]:
Copied!
df
df
Out[4]:
shape: (4, 17)
ACC_SIZE | BID_TIME | GMT_OFFSET | LV_DATE | LV_TIM_MS | LV_TIM_NS | MAP_ENTRY_KEY | MAP_ENTRY_TYPE | MARKET_MESSAGE_TYPE | NO_ORD | ORDER_PRC | ORDER_SIDE | QUOTIM_MS | TICKER | TIMACT_MS | TIMACT_NS | TIMESTAMP |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str |
"100" | "13:56:27.000000000" | "+2" | "2025-04-25" | "50187282" | "16:01:39.983017253" | "1100.300000_B" | "UPDATE" | "UPDATE" | "10" | "1100.3" | "BID" | "50187282" | "LOBMP.MC" | "50187282" | "16:01:39.983017253" | "2025-03-04T16:01:39.983017253Z" |
"307" | "13:56:27.000000000" | "+2" | "2025-04-25" | "50187282" | "16:01:39.983017253" | "1100.40000_B" | "UPDATE" | "UPDATE" | "8" | "11100.455" | "BID" | "50187282" | "LOBMP.MC" | "50187282" | "16:01:39.983017253" | "2025-03-04T16:02:39.983017253Z" |
"100" | "13:56:27.000000000" | "+2" | "2025-04-26" | "50187282" | "16:01:39.983017253" | "1100.300000_B" | "ADD" | "UPDATE" | "1" | "1100.3" | "BID" | "50187282" | "TEST.MC" | "50187282" | "16:01:39.983017253" | "2025-03-04T16:01:39.983017253Z" |
"307" | "13:56:27.000000000" | "+2" | "2025-04-26" | "50187282" | "16:01:39.983017253" | "1100.40000_B" | "ADD" | "UPDATE" | "2" | "11100.455" | "BID" | "50187282" | "TEST.MC" | "50187282" | "" | "2025-03-04T16:02:39.983017253Z" |
Finally you may want to filter and transform some columns to a more useful format
In [5]:
Copied!
df.filter(pl.col("TICKER") == "TEST.MC").with_columns(
pl.col("TIMESTAMP").str.strptime(pl.Datetime("ns")),
pl.col("ORDER_PRC").cast(pl.Decimal, strict=False),
pl.col("NO_ORD").cast(pl.Int64, strict=False),
pl.col("ACC_SIZE").cast(pl.Int64, strict=False),
).filter(
# Get book changes between 2025-03-04T16:01:00 and 2025-03-04T16:02:00
pl.col("TIMESTAMP").is_between(
pl.lit("2025-03-04T16:01:00.000000000Z").str.to_datetime(time_unit="ns", time_zone="UTC"),
pl.lit("2025-03-04T16:02:00.000000000Z").str.to_datetime(time_unit="ns", time_zone="UTC"),
closed="left",
)
)
df.filter(pl.col("TICKER") == "TEST.MC").with_columns(
pl.col("TIMESTAMP").str.strptime(pl.Datetime("ns")),
pl.col("ORDER_PRC").cast(pl.Decimal, strict=False),
pl.col("NO_ORD").cast(pl.Int64, strict=False),
pl.col("ACC_SIZE").cast(pl.Int64, strict=False),
).filter(
# Get book changes between 2025-03-04T16:01:00 and 2025-03-04T16:02:00
pl.col("TIMESTAMP").is_between(
pl.lit("2025-03-04T16:01:00.000000000Z").str.to_datetime(time_unit="ns", time_zone="UTC"),
pl.lit("2025-03-04T16:02:00.000000000Z").str.to_datetime(time_unit="ns", time_zone="UTC"),
closed="left",
)
)
Out[5]:
shape: (1, 17)
ACC_SIZE | BID_TIME | GMT_OFFSET | LV_DATE | LV_TIM_MS | LV_TIM_NS | MAP_ENTRY_KEY | MAP_ENTRY_TYPE | MARKET_MESSAGE_TYPE | NO_ORD | ORDER_PRC | ORDER_SIDE | QUOTIM_MS | TICKER | TIMACT_MS | TIMACT_NS | TIMESTAMP |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
i64 | str | str | str | str | str | str | str | str | i64 | decimal[*,3] | str | str | str | str | str | datetime[ns, UTC] |
100 | "13:56:27.000000000" | "+2" | "2025-04-26" | "50187282" | "16:01:39.983017253" | "1100.300000_B" | "ADD" | "UPDATE" | 1 | 1100.300 | "BID" | "50187282" | "TEST.MC" | "50187282" | "16:01:39.983017253" | 2025-03-04 16:01:39.983017253 UTC |