From 8244f9ec5133eaa7e9d4181e342cb7a3d5a4ea45 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 20 Oct 2018 19:25:20 +0200 Subject: [PATCH] We can now decode IBM MFM disks all the way to an image, although with no CRC checking as yet. --- lib/decoders/decoders.cc | 89 ++++++++++++++++++++++++++++++++++++ lib/decoders/decoders.h | 35 +++++++++++++++ lib/decoders/ibm.cc | 59 ++++++++++++++++++++++++ lib/decoders/mfm.cc | 97 ++++++++++++++++++++++++++++++++++++++++ lib/flags.h | 12 +++++ lib/fluxmap.h | 2 + lib/globals.cc | 4 +- lib/image.cc | 40 +++++++++++++++++ lib/image.h | 33 ++++++++++++++ lib/reader.cc | 9 ++-- meson.build | 30 ++++++++++--- src/fe-readibm.cc | 35 ++++++++++++++- 12 files changed, 430 insertions(+), 15 deletions(-) create mode 100644 lib/decoders/decoders.cc create mode 100644 lib/decoders/decoders.h create mode 100644 lib/decoders/ibm.cc create mode 100644 lib/decoders/mfm.cc create mode 100644 lib/image.cc create mode 100644 lib/image.h diff --git a/lib/decoders/decoders.cc b/lib/decoders/decoders.cc new file mode 100644 index 00000000..5f80f33d --- /dev/null +++ b/lib/decoders/decoders.cc @@ -0,0 +1,89 @@ +#include "globals.h" +#include "flags.h" +#include "fluxmap.h" +#include "protocol.h" + +static IntFlag clockDetectionNoiseFloor( + { "--clock-detection-noise-floor" }, + "Noise floor used for clock detection in flux.", + 50); + +static DoubleFlag clockDecodeThreshold( + { "--clock-decode-threshold" }, + "Pulses below this fraction of a clock tick are considered spurious and ignored.", + 0.80); + +/* + * Tries to guess the clock by finding the smallest common interval. + * Returns nanoseconds. + */ +nanoseconds_t Fluxmap::guessClock() const +{ + uint32_t buckets[256] = {}; + for (uint8_t interval : _intervals) + buckets[interval]++; + + int peaklo = 0; + while (peaklo < 256) + { + if (buckets[peaklo] > 100) + break; + peaklo++; + } + + uint32_t peakmaxindex = peaklo; + uint32_t peakmaxvalue = buckets[peakmaxindex]; + uint32_t peakhi = peaklo; + while (peakhi < 256) + { + uint32_t v = buckets[peakhi]; + if (buckets[peakhi] < (uint32_t)clockDetectionNoiseFloor) + break; + if (v > peakmaxvalue) + { + peakmaxindex = peakhi; + peakmaxvalue = v; + } + peakhi++; + } + + /* + * Okay, peakmaxindex should now be a good candidate for the (or a) clock. + * How this maps onto the actual clock rate depends on the encoding. + */ + + return peakmaxindex * NS_PER_TICK; +} + +/* Decodes a fluxmap into a nice aligned array of bits. */ +std::vector decodeFluxmapToBits(const Fluxmap& fluxmap, nanoseconds_t clockPeriod) +{ + int pulses = fluxmap.duration() / clockPeriod; + nanoseconds_t lowerThreshold = clockPeriod * clockDecodeThreshold; + + std::vector bitmap(pulses); + unsigned count = 0; + int cursor = 0; + nanoseconds_t timestamp = 0; + for (;;) + { + while (timestamp < lowerThreshold) + { + if (cursor >= fluxmap.bytes()) + goto abort; + uint8_t interval = fluxmap[cursor++]; + timestamp += interval * NS_PER_TICK; + } + + int clocks = (timestamp + clockPeriod/2) / clockPeriod; + count += clocks; + if (count >= bitmap.size()) + goto abort; + bitmap[count] = true; + timestamp = 0; + } +abort: + + return bitmap; +} + diff --git a/lib/decoders/decoders.h b/lib/decoders/decoders.h new file mode 100644 index 00000000..d24b3c98 --- /dev/null +++ b/lib/decoders/decoders.h @@ -0,0 +1,35 @@ +#ifndef DECODERS_H +#define DECODERS_H + +/* IBM format (i.e. ordinary PC floppies). */ + +#define IBM_IAM 0xFC /* start-of-track record */ +#define IBM_IAM_LEN 4 +#define IBM_IDAM 0xFE /* sector header */ +#define IBM_IDAM_LEN 10 +#define IBM_DAM1 0xF8 /* sector data (type 1) */ +#define IBM_DAM2 0xFB /* sector data (type 2) */ +#define IBM_DAM_LEN 6 /* plus user data */ +/* Length of a DAM record is determined by the previous sector header. */ + +struct IbmIdam +{ + uint8_t marker[3]; + uint8_t id; + uint8_t cylinder; + uint8_t side; + uint8_t sector; + uint8_t sectorSize; + uint16_t crcBE; +}; + +class Sector; +class Fluxmap; + +extern std::vector decodeFluxmapToBits(const Fluxmap& fluxmap, nanoseconds_t clock_period); + +extern std::vector> decodeBitsToRecordsMfm(const std::vector& bitmap); + +extern std::vector> decodeIbmRecordsToSectors(const std::vector>& records); + +#endif diff --git a/lib/decoders/ibm.cc b/lib/decoders/ibm.cc new file mode 100644 index 00000000..9cb7192d --- /dev/null +++ b/lib/decoders/ibm.cc @@ -0,0 +1,59 @@ +#include "globals.h" +#include "decoders.h" +#include "image.h" +#include + +static_assert(std::is_trivially_copyable::value); + +std::vector> decodeIbmRecordsToSectors(const std::vector>& records) +{ + bool idamValid = false; + IbmIdam idam; + std::vector> sectors; + + for (auto& record : records) + { + switch (record[3]) + { + case IBM_IAM: + /* Track header. Ignore. */ + break; + + case IBM_IDAM: + { + if (record.size() < sizeof(idam)) + goto garbage; + memcpy(&idam, &record[0], sizeof(idam)); + idamValid = true; + /* TODO: check CRC! */ + break; + } + + case IBM_DAM1: + case IBM_DAM2: + { + if (!idamValid) + goto garbage; + + unsigned size = 1 << (idam.sectorSize + 7); + if ((record.size()-IBM_DAM_LEN) < size) + goto garbage; + /* TODO: check CRC! */ + + std::vector sectordata(size); + memcpy(§ordata[0], &record[4], size); + + auto sector = std::unique_ptr(new Sector(idam.cylinder, idam.side, idam.sector-1, sectordata)); + sectors.push_back(std::move(sector)); + idamValid = false; + break; + } + + default: + garbage: + Error() << "garbage record on disk (this diagnostic needs improving)"; + } + } + + return sectors; +} diff --git a/lib/decoders/mfm.cc b/lib/decoders/mfm.cc new file mode 100644 index 00000000..31d7e569 --- /dev/null +++ b/lib/decoders/mfm.cc @@ -0,0 +1,97 @@ +#include "globals.h" +#include "fluxmap.h" +#include "protocol.h" +#include +#include + +#define CLOCK_LOCK_BOOST 6 /* arbitrary */ +#define CLOCK_LOCK_DECAY 1 /* arbitrary */ +#define CLOCK_DETECTOR_AMPLITUDE_THRESHOLD 60 /* arbi4rary */ +#define CLOCK_ERROR_BOUNDS 0.25 + +static unsigned cursor; +static std::vector outputbuffer; +static uint8_t outputfifo = 0; +static int bitcount = 0; +static bool phase = false; + +static void write_bit(bool bit) +{ + outputfifo = (outputfifo << 1) | bit; + bitcount++; + if (bitcount == 8) + { + outputbuffer.push_back(outputfifo); + bitcount = 0; + } +} + +std::vector> decodeBitsToRecordsMfm(const std::vector& bits) +{ + std::vector> records; + + cursor = 0; + uint64_t inputfifo = 0; + bool reading = false; + + while (cursor < bits.size()) + { + bool bit = bits[cursor++]; + inputfifo = (inputfifo << 1) | bit; + + /* + * The IAM record, which is the first one on the disk (and is optional), uses + * a distorted 0xC2 0xC2 0xC2 marker to identify it. Unfortunately, if this is + * shifted out of phase, it becomes a legal encoding, so if we're looking at + * real data we can't honour this. + * + * 0xC2 is: + * data: 1 1 0 0 0 0 1 0 + * mfm: 01 01 00 10 10 10 01 00 = 0x5254 + * special: 01 01 00 10 00 10 01 00 = 0x5224 + * ^^^^ + * shifted: 10 10 01 00 01 00 10 0. = legal, and might happen in real data + * + * Therefore, when we've read the marker, the input fifo will contain + * 0xXXXX522252225222. + * + * All other records use 0xA1 as a marker: + * + * 0xA1 is: + * data: 1 0 1 0 0 0 0 1 + * mfm: 01 00 01 00 10 10 10 01 = 0x44A9 + * special: 01 00 01 00 10 00 10 01 = 0x4489 + * ^^^^^ + * shifted: 10 00 10 01 00 01 00 1 + * + * When this is shifted out of phase, we get an illegal encoding (you + * can't do 10 00). So, if we ever see 0x448944894489 in the input + * fifo, we know we've landed at the beginning of a new record. + */ + + uint64_t masked = inputfifo & 0xFFFFFFFFFFFFLL; + if ((!reading && (masked == 0x522452245224LL)) || (masked == 0x448944894489LL)) + { + if (reading) + records.push_back(outputbuffer); + + outputbuffer.resize(3); + std::fill(outputbuffer.begin(), outputbuffer.begin()+3, reading ? 0xA1 : 0xC2); + + reading = true; + bitcount = 0; + phase = 0; + } + else if (reading) + { + if (phase) + write_bit(bit); + phase = !phase; + } + } + + if (reading) + records.push_back(outputbuffer); + + return records; +} \ No newline at end of file diff --git a/lib/flags.h b/lib/flags.h index 25ecffe6..262e4759 100644 --- a/lib/flags.h +++ b/lib/flags.h @@ -101,4 +101,16 @@ public: void set(const std::string value) { _value = std::stoi(value); } }; +class DoubleFlag : public ValueFlag +{ +public: + DoubleFlag(const std::vector& names, const std::string helptext, + double defaultValue = 1.0): + ValueFlag(names, helptext, defaultValue) + {} + + const std::string defaultValue() const { return std::to_string(_defaultValue); } + void set(const std::string value) { _value = std::stod(value); } +}; + #endif diff --git a/lib/fluxmap.h b/lib/fluxmap.h index 9b3374b8..783aa76b 100644 --- a/lib/fluxmap.h +++ b/lib/fluxmap.h @@ -16,6 +16,8 @@ public: Fluxmap& appendIntervals(std::vector& intervals); Fluxmap& appendIntervals(const uint8_t* ptr, size_t len); + nanoseconds_t guessClock() const; + private: nanoseconds_t _duration = 0; int _ticks = 0; diff --git a/lib/globals.cc b/lib/globals.cc index 616adb28..db40a3e1 100644 --- a/lib/globals.cc +++ b/lib/globals.cc @@ -1,5 +1,6 @@ #include "globals.h" #include +#include double getCurrentTime(void) { @@ -7,5 +8,4 @@ double getCurrentTime(void) gettimeofday(&tv, NULL); return double(tv.tv_sec) + tv.tv_usec/1000000.0; -} - +} \ No newline at end of file diff --git a/lib/image.cc b/lib/image.cc new file mode 100644 index 00000000..e2d213d9 --- /dev/null +++ b/lib/image.cc @@ -0,0 +1,40 @@ +#include "globals.h" +#include "image.h" +#include "fmt/format.h" +#include +#include +#include + +void writeSectorsToFile(const std::vector>& sectors, const std::string& filename) +{ + /* Count the tracks, sides and sectors. */ + + int trackCount = 0; + int sideCount = 0; + int sectorCount = 0; + size_t sectorSize = 0; + for (auto& sector : sectors) + { + trackCount = std::max(sector->track()+1, trackCount); + sideCount = std::max(sector->side()+1, sideCount); + sectorCount = std::max(sector->sector()+1, sectorCount); + sectorSize = std::max(sector->data().size(), sectorSize); + } + + size_t sideSize = sectorCount * sectorSize; + size_t trackSize = sideSize * sideCount; + + std::cout << fmt::format("{} tracks, {} sides, {} sectors, {} bytes per sector, {} kB total", + trackCount, sideCount, sectorCount, sectorSize, + trackCount * sideCount * sectorCount * sectorSize / 1024); + + std::ofstream outputFile(filename, std::ios::out | std::ios::binary); + if (!outputFile.is_open()) + Error() << "cannot open output file"; + + for (auto& sector : sectors) + { + outputFile.seekp(sector->track()*trackSize + sector->side()*sideSize + sector->sector()*sectorSize, std::ios::beg); + outputFile.write((const char*) §or->data().at(0), sector->data().size()); + } +} diff --git a/lib/image.h b/lib/image.h new file mode 100644 index 00000000..35d09ff7 --- /dev/null +++ b/lib/image.h @@ -0,0 +1,33 @@ +#ifndef IMAGE_H +#define IMAGE_H + +/* + * Note that sectors here used zero-based numbering throughout (to make the + * maths easier); traditionally floppy disk use 0-based track numbering and + * 1-based sector numbering, which makes no sense. + */ +class Sector +{ +public: + Sector(int track, int side, int sector, const std::vector& data): + _track(track), + _side(side), + _sector(sector), + _data(data) + {} + + int track() const { return _track; } + int side() const { return _side; } + int sector() const { return _sector; } + const std::vector& data() const { return _data; } + +private: + const int _track; + const int _side; + const int _sector; + const std::vector _data; +}; + +extern void writeSectorsToFile(const std::vector>& sectors, const std::string& filename); + +#endif diff --git a/lib/reader.cc b/lib/reader.cc index c4dc5e2b..1d491721 100644 --- a/lib/reader.cc +++ b/lib/reader.cc @@ -4,6 +4,7 @@ #include "reader.h" #include "fluxmap.h" #include "sql.h" +#include "fmt/format.h" #include static const std::regex SOURCE_REGEX("([^:]*)" @@ -40,7 +41,9 @@ Fluxmap& Track::read() { if (!_read) { + std::cout << fmt::format("{0:>3}.{1}: ", track, side) << std::flush; reallyRead(); + std::cout << fmt::format("{0} ms in {1} bytes", int(_fluxmap->duration()/1e6), _fluxmap->bytes()) << std::endl; _read = true; } return *_fluxmap.get(); @@ -53,22 +56,16 @@ void Track::forceReread() void CapturedTrack::reallyRead() { - std::cout << "read track " << track << " side " << side << ": " << std::flush; usbSeek(track); _fluxmap = usbRead(side, revolutions); - std::cout << int(_fluxmap->duration()/1e6) << "ms in " << _fluxmap->bytes() << " bytes" << std::endl; } void FileTrack::reallyRead() { - std::cout << "read track " << track << " side " << side << ": " << std::flush; - if (!db) db = sqlOpen(basefilename, SQLITE_OPEN_READONLY); _fluxmap = sqlReadFlux(db, track, side); - - std::cout << int(_fluxmap->duration()/1e6) << "ms in " << _fluxmap->bytes() << " bytes" << std::endl; } std::vector> readTracks() diff --git a/meson.build b/meson.build index d1850d9e..d8bd884e 100644 --- a/meson.build +++ b/meson.build @@ -24,23 +24,43 @@ executable('fluxclient', dependencies: [libusb, sqlite] ) +fmtlib = shared_library('fmtlib', + [ + 'dep/fmt/format.cc', + 'dep/fmt/posix.cc' + ]) +fmtinc = include_directories('dep/fmt') + felib = shared_library('felib', [ 'lib/flags.cc', 'lib/fluxmap.cc', 'lib/globals.cc', 'lib/usb.cc', + 'lib/image.cc', ], + include_directories: [fmtinc], + link_with: [fmtlib], dependencies: [libusb] ) - -sqllib = shared_library('sqllib', ['lib/sql.cc'], link_with: [felib], dependencies: [sqlite]) -readerlib = shared_library('readerlib', ['lib/reader.cc'], link_with: [felib, sqllib]) - feinc = include_directories('lib') +sqllib = shared_library('sqllib', ['lib/sql.cc'], link_with: [felib], dependencies: [sqlite]) +readerlib = shared_library('readerlib', ['lib/reader.cc'], include_directories: [fmtinc], link_with: [felib, sqllib, fmtlib]) + +decoderlib = shared_library('decoderlib', + [ + 'lib/decoders/decoders.cc', + 'lib/decoders/mfm.cc', + 'lib/decoders/ibm.cc' + ], + include_directories: [feinc], + link_with: [felib] +) +decoderinc = include_directories('lib/decoders') + executable('fe-rpm', ['src/fe-rpm.cc'], include_directories: [feinc], link_with: [felib]) executable('fe-seek', ['src/fe-seek.cc'], include_directories: [feinc], link_with: [felib]) executable('fe-testbulktransport', ['src/fe-testbulktransport.cc'], include_directories: [feinc], link_with: [felib]) -executable('fe-readibm', ['src/fe-readibm.cc'], include_directories: [feinc], link_with: [felib, readerlib]) +executable('fe-readibm', ['src/fe-readibm.cc'], include_directories: [feinc, fmtinc, decoderinc], link_with: [felib, readerlib, decoderlib, fmtlib]) diff --git a/src/fe-readibm.cc b/src/fe-readibm.cc index fe829e4a..6c7fab28 100644 --- a/src/fe-readibm.cc +++ b/src/fe-readibm.cc @@ -2,16 +2,47 @@ #include "flags.h" #include "reader.h" #include "fluxmap.h" +#include "decoders.h" +#include "image.h" +#include + +static StringFlag outputFilename( + { "--output", "-o" }, + "The output image file to write to.", + "ibm.img"); int main(int argc, const char* argv[]) { Flag::parseFlags(argc, argv); + + std::vector> allSectors; for (auto& track : readTracks()) { - track->read(); - std::cout << "track " << track->track << " " << track->side << std::endl; + Fluxmap& fluxmap = track->read(); + nanoseconds_t clockPeriod = fluxmap.guessClock(); + std::cout << fmt::format(" {:.1f} us clock; ", (double)clockPeriod/1000.0) << std::flush; + + /* For MFM, the bit clock is half the detected clock. */ + auto bitmap = decodeFluxmapToBits(fluxmap, clockPeriod/2); + std::cout << fmt::format("{} bytes encoded; ", bitmap.size()/8) << std::flush; + + auto records = decodeBitsToRecordsMfm(bitmap); + std::cout << records.size() << " records." << std::endl; + + auto sectors = decodeIbmRecordsToSectors(records); + std::cout << " " << sectors.size() << " sectors; "; + + int size = 0; + for (auto& sector : sectors) + { + size += sector->data().size(); + allSectors.push_back(std::move(sector)); + } + std::cout << size << " bytes decoded." << std::endl; } + + writeSectorsToFile(allSectors, outputFilename); return 0; }