Compare commits

...

10 Commits

Author SHA1 Message Date
David Given
ce4cabde96 Rework the kmedian code to do all arithmetic in ticks rather than us. This
apparently improves things --- precision related, maybe?
2020-07-04 14:35:04 +01:00
David Given
ffbb6609c8 Merge from trunk. 2020-07-04 12:17:45 +01:00
David Given
5b66f803f3 First stage of the massive refactor to rework things to use the k-median
algorithm.
2020-06-27 20:17:33 +02:00
David Given
33d14a5fbe Bash the decoders into compiling-but-not-yet-working with the kmedian
classifier.
2020-06-27 14:46:35 +01:00
David Given
5d27b6d9aa Use the k-median code in the inspector for actual reals. 2020-06-27 11:55:53 +02:00
David Given
0d90dcee0f Replace the kmedian code with the weighted version --- vastly faster (a factor
of ten).
2020-06-27 11:25:03 +02:00
David Given
c89f5b9649 Add a test case for the kmedian code. 2020-06-26 20:53:55 +02:00
David Given
c5bc9a8cee Merge from trunk. 2020-06-26 16:00:43 +02:00
David Given
347e4d59a3 Fix the incredibly stupid bug when determining the median. 2020-06-26 15:24:04 +02:00
David Given
6e56cc3b0a First attempt at the optimised k-median code from kristomu@github. 2020-06-26 00:15:32 +02:00
40 changed files with 590 additions and 197 deletions

View File

@@ -13,6 +13,9 @@ class AesLanierDecoder : public AbstractDecoder
public:
virtual ~AesLanierDecoder() {}
int getDecoderBands() const { return 4; }
bool isInterleaved() const { return true; }
RecordType advanceToNextRecord();
void decodeSectorRecord();
};

View File

@@ -26,8 +26,8 @@ static Bytes reverse_bits(const Bytes& input)
AbstractDecoder::RecordType AesLanierDecoder::advanceToNextRecord()
{
_sector->clock = _fmr->seekToPattern(SECTOR_PATTERN);
if (_fmr->eof() || !_sector->clock)
nanoseconds_t clock = _fmr->seekToPattern(SECTOR_PATTERN);
if (_fmr->eof() || !clock)
return UNKNOWN_RECORD;
return SECTOR_RECORD;
}

View File

@@ -13,7 +13,7 @@ class Sector;
class Fluxmap;
class SectorSet;
class AmigaDecoder : public AbstractDecoder
class AmigaDecoder : public AbstractMfmDecoder
{
public:
virtual ~AmigaDecoder() {}

View File

@@ -23,8 +23,8 @@ static const FluxPattern SECTOR_PATTERN(48, AMIGA_SECTOR_RECORD);
AbstractDecoder::RecordType AmigaDecoder::advanceToNextRecord()
{
_sector->clock = _fmr->seekToPattern(SECTOR_PATTERN);
if (_fmr->eof() || !_sector->clock)
_fmr->seekToPattern(SECTOR_PATTERN);
if (_fmr->eof())
return UNKNOWN_RECORD;
return SECTOR_RECORD;
}

View File

@@ -10,7 +10,7 @@
class Sector;
class Fluxmap;
class Apple2Decoder : public AbstractDecoder
class Apple2Decoder : public AbstractGcrDecoder
{
public:
virtual ~Apple2Decoder() {}

View File

@@ -68,7 +68,7 @@ uint8_t combine(uint16_t word)
AbstractDecoder::RecordType Apple2Decoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return RecordType::SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -21,6 +21,9 @@ class BrotherDecoder : public AbstractDecoder
public:
virtual ~BrotherDecoder() {}
int getDecoderBands() const { return 2; }
bool isInterleaved() const { return false; }
RecordType advanceToNextRecord();
void decodeSectorRecord();
void decodeDataRecord();

View File

@@ -57,7 +57,7 @@ static int decode_header_gcr(uint16_t word)
AbstractDecoder::RecordType BrotherDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return RecordType::SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -8,7 +8,7 @@
class Sector;
class Fluxmap;
class Commodore64Decoder : public AbstractDecoder
class Commodore64Decoder : public AbstractGcrDecoder
{
public:
virtual ~Commodore64Decoder() {}

View File

@@ -55,7 +55,7 @@ static Bytes decode(const std::vector<bool>& bits)
AbstractDecoder::RecordType Commodore64Decoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return RecordType::SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -55,7 +55,7 @@ static Bytes decode(const std::vector<bool>& bits)
AbstractDecoder::RecordType DurangoF85Decoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return RecordType::SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -8,7 +8,7 @@
class Sector;
class Fluxmap;
class DurangoF85Decoder : public AbstractDecoder
class DurangoF85Decoder : public AbstractGcrDecoder
{
public:
virtual ~DurangoF85Decoder() {}

View File

@@ -102,7 +102,7 @@ static uint16_t checksum(const Bytes& bytes)
AbstractDecoder::RecordType Fb100Decoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(SECTOR_ID_PATTERN, matcher);
_fmr->seekToPattern(SECTOR_ID_PATTERN, matcher);
if (matcher == &SECTOR_ID_PATTERN)
return RecordType::SECTOR_RECORD;
return RecordType::UNKNOWN_RECORD;
@@ -132,4 +132,4 @@ void Fb100Decoder::decodeSectorRecord()
_sector->data.writer().append(id.slice(5, 12)).append(payload);
_sector->status = (wantPayloadCrc == gotPayloadCrc) ? Sector::OK : Sector::BAD_CHECKSUM;
}
}

View File

@@ -9,7 +9,7 @@ class Sector;
class Fluxmap;
class Track;
class Fb100Decoder : public AbstractDecoder
class Fb100Decoder : public AbstractFmDecoder
{
public:
virtual ~Fb100Decoder() {}

View File

@@ -92,7 +92,7 @@ const FluxMatchers ANY_RECORD_PATTERN(
AbstractDecoder::RecordType IbmDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
/* If this is the MFM prefix byte, the the decoder is going to expect three
* extra bytes on the front of the header. */

View File

@@ -29,7 +29,7 @@ struct IbmIdam
uint8_t crc[2];
};
class IbmDecoder : public AbstractDecoder
class IbmDecoder : public AbstractMfmDecoder
{
public:
IbmDecoder(unsigned sectorBase, bool ignoreSideByte=false,

View File

@@ -127,7 +127,7 @@ uint8_t decode_side(uint8_t side)
AbstractDecoder::RecordType MacintoshDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -10,7 +10,7 @@
class Sector;
class Fluxmap;
class MacintoshDecoder : public AbstractDecoder
class MacintoshDecoder : public AbstractGcrDecoder
{
public:
virtual ~MacintoshDecoder() {}

View File

@@ -35,7 +35,7 @@ AbstractDecoder::RecordType MxDecoder::advanceToNextRecord()
{
/* First sector in the track: look for the sync marker. */
const FluxMatcher* matcher = nullptr;
_sector->clock = _clock = _fmr->seekToPattern(ID_PATTERN, matcher);
_fmr->seekToPattern(ID_PATTERN, matcher);
readRawBits(32); /* skip the ID mark */
_logicalTrack = decodeFmMfm(readRawBits(32)).slice(0, 32).reader().read_be16();
}
@@ -44,13 +44,6 @@ AbstractDecoder::RecordType MxDecoder::advanceToNextRecord()
/* That was the last sector on the disk. */
return UNKNOWN_RECORD;
}
else
{
/* Otherwise we assume the clock from the first sector is still valid.
* The decoder framwork will automatically stop when we hit the end of
* the track. */
_sector->clock = _clock;
}
_currentSector++;
return SECTOR_RECORD;

View File

@@ -3,7 +3,7 @@
#include "decoders/decoders.h"
class MxDecoder : public AbstractDecoder
class MxDecoder : public AbstractFmDecoder
{
public:
virtual ~MxDecoder() {}

View File

@@ -57,7 +57,7 @@ static Bytes decode(const std::vector<bool>& bits)
AbstractDecoder::RecordType Victor9kDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -9,7 +9,7 @@
class Sector;
class Fluxmap;
class Victor9kDecoder : public AbstractDecoder
class Victor9kDecoder : public AbstractGcrDecoder
{
public:
virtual ~Victor9kDecoder() {}

View File

@@ -18,7 +18,7 @@ AbstractDecoder::RecordType ZilogMczDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_fmr->seekToIndexMark();
_sector->clock = _fmr->seekToPattern(SECTOR_START_PATTERN, matcher);
_fmr->seekToPattern(SECTOR_START_PATTERN, matcher);
if (matcher == &SECTOR_START_PATTERN)
return SECTOR_RECORD;
return UNKNOWN_RECORD;

View File

@@ -4,7 +4,7 @@
class Sector;
class Fluxmap;
class ZilogMczDecoder : public AbstractDecoder
class ZilogMczDecoder : public AbstractGcrDecoder
{
public:
virtual ~ZilogMczDecoder() {}

View File

@@ -16,22 +16,23 @@ void AbstractDecoder::decodeToSectors(Track& track)
Sector sector;
sector.physicalSide = track.physicalSide;
sector.physicalTrack = track.physicalTrack;
FluxmapReader fmr(*track.fluxmap);
FluxmapReader fmr(*track.fluxmap, getDecoderBands(), isInterleaved());
_track = &track;
_sector = &sector;
_fmr = &fmr;
_track->intervals = fmr.intervals();
beginTrack();
for (;;)
{
Fluxmap::Position recordStart = fmr.tell();
sector.clock = 0;
sector.intervals = fmr.intervals();
sector.status = Sector::MISSING;
sector.data.clear();
sector.logicalSector = sector.logicalSide = sector.logicalTrack = 0;
RecordType r = advanceToNextRecord();
if (fmr.eof() || !sector.clock)
if (fmr.eof() || sector.intervals.empty())
return;
if ((r == UNKNOWN_RECORD) || (r == DATA_RECORD))
{
@@ -80,11 +81,11 @@ void AbstractDecoder::pushRecord(const Fluxmap::Position& start, const Fluxmap::
RawRecord record;
record.physicalSide = _track->physicalSide;
record.physicalTrack = _track->physicalTrack;
record.clock = _sector->clock;
record.intervals = _sector->intervals;
record.position = start;
_fmr->seek(start);
record.data = toBytes(_fmr->readRawBits(end, _sector->clock));
record.data = toBytes(_fmr->readRawBits(end));
_track->rawrecords.push_back(record);
_fmr->seek(here);
}

View File

@@ -44,7 +44,7 @@ public:
void pushRecord(const Fluxmap::Position& start, const Fluxmap::Position& end);
std::vector<bool> readRawBits(unsigned count)
{ return _fmr->readRawBits(count, _sector->clock); }
{ return _fmr->readRawBits(count); }
Fluxmap::Position tell()
{ return _fmr->tell(); }
@@ -58,6 +58,9 @@ public:
virtual std::set<unsigned> requiredSectors(Track& track) const;
protected:
virtual int getDecoderBands() const = 0;
virtual bool isInterleaved() const = 0;
virtual void beginTrack() {};
virtual RecordType advanceToNextRecord() = 0;
virtual void decodeSectorRecord() = 0;
@@ -68,4 +71,25 @@ protected:
Sector* _sector;
};
class AbstractFmDecoder : public AbstractDecoder
{
protected:
int getDecoderBands() const { return 2; }
bool isInterleaved() const { return false; }
};
class AbstractMfmDecoder : public AbstractDecoder
{
protected:
int getDecoderBands() const { return 3; }
bool isInterleaved() const { return true; }
};
class AbstractGcrDecoder : public AbstractDecoder
{
protected:
int getDecoderBands() const { return 3; }
bool isInterleaved() const { return false; }
};
#endif

View File

@@ -3,6 +3,7 @@
#include "decoders/fluxmapreader.h"
#include "flags.h"
#include "protocol.h"
#include "kmedian.h"
#include "fmt/format.h"
#include <numeric>
#include <math.h>
@@ -30,6 +31,26 @@ static DoubleFlag minimumClockUs(
"Refuse to detect clocks shorter than this, to avoid false positives.",
0.75);
FluxmapReader::FluxmapReader(const Fluxmap& fluxmap):
_fluxmap(fluxmap),
_bytes(fluxmap.ptr()),
_size(fluxmap.bytes()),
_isInterleaved(false)
{
rewind();
}
FluxmapReader::FluxmapReader(const Fluxmap& fluxmap, int bands, bool isInterleaved):
_fluxmap(fluxmap),
_bytes(fluxmap.ptr()),
_size(fluxmap.bytes()),
_isInterleaved(isInterleaved)
{
for (unsigned ticks : optimalKMedian(fluxmap, bands))
_intervals.push_back(ticks * NS_PER_TICK);
rewind();
}
uint8_t FluxmapReader::getNextEvent(unsigned& ticks)
{
ticks = 0;
@@ -65,8 +86,9 @@ unsigned FluxmapReader::findEvent(uint8_t target)
}
}
unsigned FluxmapReader::readInterval(nanoseconds_t clock)
unsigned FluxmapReader::readInterval()
{
nanoseconds_t clock = _intervals.front();
unsigned thresholdTicks = (clock * pulseDebounceThreshold) / NS_PER_TICK;
unsigned ticks = 0;
@@ -94,7 +116,8 @@ static int findLowestSetBit(uint64_t value)
}
FluxPattern::FluxPattern(unsigned bits, uint64_t pattern):
_bits(bits)
_bits(bits),
_pattern(pattern)
{
const uint64_t TOPBIT = 1ULL << 63;
@@ -251,9 +274,9 @@ void FluxmapReader::seekToIndexMark()
_pos.zeroes = 0;
}
bool FluxmapReader::readRawBit(nanoseconds_t clockPeriod)
bool FluxmapReader::readRawBit()
{
assert(clockPeriod != 0);
assert(!_intervals.empty());
if (_pos.zeroes)
{
@@ -261,30 +284,38 @@ bool FluxmapReader::readRawBit(nanoseconds_t clockPeriod)
return false;
}
nanoseconds_t interval = readInterval(clockPeriod)*NS_PER_TICK;
double clocks = (double)interval / clockPeriod + clockIntervalBias;
nanoseconds_t interval = readInterval()*NS_PER_TICK;
int clocks = 0;
while (clocks < _intervals.size()-1)
{
float median = (_intervals[clocks] + _intervals[clocks+1])/2.0;
if (interval < median)
break;
clocks++;
}
if (clocks < 1.0)
clocks = 1.0;
_pos.zeroes = (int)round(clocks) - 1;
if (_isInterleaved)
clocks++;
_pos.zeroes = clocks;
return true;
}
std::vector<bool> FluxmapReader::readRawBits(unsigned count, nanoseconds_t clockPeriod)
std::vector<bool> FluxmapReader::readRawBits(unsigned count)
{
std::vector<bool> result;
while (!eof() && count--)
{
bool b = readRawBit(clockPeriod);
bool b = readRawBit();
result.push_back(b);
}
return result;
}
std::vector<bool> FluxmapReader::readRawBits(const Fluxmap::Position& until, nanoseconds_t clockPeriod)
std::vector<bool> FluxmapReader::readRawBits(const Fluxmap::Position& until)
{
std::vector<bool> result;
while (!eof() && (_pos.bytes < until.bytes))
result.push_back(readRawBit(clockPeriod));
result.push_back(readRawBit());
return result;
}

View File

@@ -41,6 +41,7 @@ private:
std::vector<unsigned> _intervals;
unsigned _length;
unsigned _bits;
uint64_t _pattern;
unsigned _highzeroes;
bool _lowzero = false;
@@ -67,14 +68,8 @@ private:
class FluxmapReader
{
public:
FluxmapReader(const Fluxmap& fluxmap):
_fluxmap(fluxmap),
_bytes(fluxmap.ptr()),
_size(fluxmap.bytes())
{
rewind();
}
FluxmapReader(const Fluxmap& fluxmap);
FluxmapReader(const Fluxmap& fluxmap, int bands, bool isInterleaved);
FluxmapReader(const Fluxmap&& fluxmap) = delete;
void rewind()
@@ -98,7 +93,7 @@ public:
uint8_t getNextEvent(unsigned& ticks);
unsigned findEvent(uint8_t bits);
unsigned readInterval(nanoseconds_t clock); /* with debounce support */
unsigned readInterval(); /* with debounce support */
/* Important! You can only reliably seek to 1 bits. */
void seek(nanoseconds_t ns);
@@ -107,15 +102,19 @@ public:
nanoseconds_t seekToPattern(const FluxMatcher& pattern);
nanoseconds_t seekToPattern(const FluxMatcher& pattern, const FluxMatcher*& matching);
bool readRawBit(nanoseconds_t clockPeriod);
std::vector<bool> readRawBits(unsigned count, nanoseconds_t clockPeriod);
std::vector<bool> readRawBits(const Fluxmap::Position& until, nanoseconds_t clockPeriod);
bool readRawBit();
std::vector<bool> readRawBits(unsigned count);
std::vector<bool> readRawBits(const Fluxmap::Position& until);
const std::vector<nanoseconds_t> intervals() const { return _intervals; };
private:
const Fluxmap& _fluxmap;
const uint8_t* _bytes;
const size_t _size;
Fluxmap::Position _pos;
std::vector<nanoseconds_t> _intervals;
const bool _isInterleaved;
};
#endif

View File

@@ -75,7 +75,6 @@ void ImageWriter::writeCsv(const std::string& filename)
"\"Logical track\","
"\"Logical side\","
"\"Logical sector\","
"\"Clock (ns)\","
"\"Header start (ns)\","
"\"Header end (ns)\","
"\"Data start (ns)\","
@@ -96,11 +95,10 @@ void ImageWriter::writeCsv(const std::string& filename)
if (!sector)
f << fmt::format(",,{},,,,,,,,MISSING\n", sectorId);
else
f << fmt::format("{},{},{},{},{},{},{},{},{},{},{}\n",
f << fmt::format("{},{},{},{},{},{},{},{},{},{}\n",
sector->logicalTrack,
sector->logicalSide,
sector->logicalSector,
sector->clock,
sector->headerStartTime,
sector->headerEndTime,
sector->dataStartTime,

322
lib/kmedian.cc Normal file
View File

@@ -0,0 +1,322 @@
#include "globals.h"
#include "fluxmap.h"
#include "decoders/fluxmapreader.h"
#include "protocol.h"
#include "kmedian.h"
#include <algorithm>
#include <numeric>
#include <math.h>
#include <limits.h>
/* Implement a O(kn log n) method for optimal 1D K-median. The K-median problem
* consists of finding k clusters so that the sum of absolute distances from
* each of the n points to the closest cluster is minimized.
*
* GRØNLUND, Allan, et al. Fast exact k-means, k-medians and Bregman divergence
* clustering in 1d. arXiv preprint arXiv:1701.07204, 2017.
*
* The points all have one of a very small range of values (compared to the
* number of points). Since k-medians clustering is either a point or the mean
* of two points, we can then do calculations with n being the number of
* distinct points, rather than the number of points, so that O(kn log n)
* becomes much quicker in concrete terms.
*
* Finding the median takes O(log n) time instead of O(1) time, which increases
* the constant factor in front of the kn log n term.
*/
class KCluster
{
public:
struct CDF
{
int pointsSoFar;
KValue sumToHere;
KValue thisValue;
};
int uniquePoints; /* number of unique points */
std::vector<struct CDF> cdf; /* cumulative sum of points */
KCluster(const std::vector<KValue>& inputPoints)
{
std::vector<KValue> points = inputPoints;
std::sort(points.begin(), points.end());
KValue current = NAN;
int count = 0;
KValue sum = 0.0;
for (KValue point : points)
{
if (point != current)
{
struct CDF empty = {0};
cdf.push_back(empty);
}
sum += point;
count++;
auto& thiscdf = cdf.back();
thiscdf.pointsSoFar = count;
thiscdf.sumToHere = sum;
thiscdf.thisValue = point;
current = point;
}
uniquePoints = cdf.size();
}
/* Determines the median point of all points between the ith category
* in cdf (inclusive), and the jth (exclusive). It's used to
* reconstruct clusters later.
*/
KValue medianPoint(int i, int j)
{
if (i >= j)
return 0;
int lowerCount = 0;
if (i > 0)
lowerCount = cdf[i-1].pointsSoFar;
int upperCount = cdf[j-1].pointsSoFar;
/* Note, this is not the index, but number of points start inclusive,
* which is why the -1 has to be turned into +1. */
KValue medianPoint = (lowerCount + upperCount + 1) / 2.0;
auto lowerMedian = std::lower_bound(cdf.begin(), cdf.end(), (int)medianPoint,
[&](const struct CDF& lhs, int rhs) {
return lhs.pointsSoFar < rhs;
}
);
if (((int)ceil(medianPoint) == (int)floor(medianPoint))
|| ((int)floor(medianPoint) != lowerMedian->pointsSoFar))
return lowerMedian->thisValue;
else
return (lowerMedian->thisValue + (lowerMedian+1)->thisValue) / 2.0;
}
/* Find the (location of the) minimum value of each row of an nxn matrix in
* n log n time, given that the matrix is monotone (by the definition of
* Grønlund et al.) and defined by a function M that takes row and column
* as parameters. All boundary values are closed, [minRow, maxRow]
* p. 197 of
* AGGARWAL, Alok, et al. Geometric applications of a matrix-searching
* algorithm. Algorithmica, 1987, 2.1-4: 195-208.
*/
void monotoneMatrixIndices(std::function<KValue(int, int)>& M,
int minRow, int maxRow, int minCol, int maxCol,
std::vector<int>& Tout,
std::vector<KValue>& Dout)
{
if (maxRow == minRow)
return;
int currentRow = minRow + (maxRow-minRow)/2;
/* Find the minimum column for this row. */
KValue minValue = INT_MAX;
int minHere = minCol;
for (int i = minCol; i<=maxCol; i++)
{
KValue v = M(currentRow, i);
if (v < minValue)
{
minValue = v;
minHere = i;
}
}
if (Tout[currentRow])
throw std::runtime_error("tried to set a variable already set");
Tout[currentRow] = minHere;
Dout[currentRow] = M(currentRow, minHere);
/* No lower row can have a minimum to the right of the current minimum.
* Recurse on that assumption. */
monotoneMatrixIndices(M, minRow, currentRow, minCol, minHere+1, Tout, Dout);
/* And no higher row can have a minimum to the left. */
monotoneMatrixIndices(M, currentRow+1, maxRow, minHere, maxCol, Tout, Dout);
}
/* If item is the first cdf value with count at
* or above i, returns the cumulative sum up to the ith entry of the
* underlying sorted points list.
*/
KValue cumulativeAt(std::vector<struct CDF>::iterator item, int i)
{
KValue sumBelow = 0.0;
int numPointsBelow = 0;
if (item != cdf.begin())
{
sumBelow = (item-1)->sumToHere;
numPointsBelow = (item-1)->pointsSoFar;
}
return sumBelow + item->thisValue*(i - numPointsBelow);
}
/* Grønlund et al.'s CC function for the K-median problem.
*
* CC(i,j) is the cost of grouping points_i...points_j into one cluster
* with the optimal cluster point (the median point). By programming
* convention, the interval is half-open and indexed from 0, unlike the
* paper's convention of closed intervals indexed from 1.
*
* Note: i and j are indices onto weighted_cdf. So e.g. if i = 0, j = 2 and
* weighted cdf is [[2, 0, 0], [4, 2, 1], [5, 2, 2]], then that is the cost
* of clustering all points between the one described by the zeroth weighted
* cdf entry, and up to (but not including) the last. In other words, it's
* CC([0, 0, 1, 1], 0, 5).
*/
KValue CC(int i, int j)
{
if (i >= j)
return 0;
int lowerCount = 0;
if (i > 0)
lowerCount = cdf[i-1].pointsSoFar;
int upperCount = cdf[j-1].pointsSoFar;
/* Note, this is not the index, but number of points start inclusive,
* which is why the -1 has to be turned into +1. */
KValue medianPoint = (lowerCount + upperCount + 1) / 2.0;
auto lowerMedian = std::lower_bound(cdf.begin(), cdf.end(), (int)medianPoint,
[&](const struct CDF& lhs, int rhs) {
return lhs.pointsSoFar < rhs;
}
);
KValue mu;
if (((int)ceil(medianPoint) == (int)floor(medianPoint))
|| ((int)floor(medianPoint) != lowerMedian->pointsSoFar))
mu = lowerMedian->thisValue;
else
mu = (lowerMedian->thisValue + (lowerMedian+1)->thisValue) / 2.0;
/* Lower part: entry i to median point between i and j, median excluded. */
int sumBelow = cumulativeAt(lowerMedian, (int)medianPoint);
if (i > 0)
sumBelow -= cdf[i-1].sumToHere;
/* Upper part: everything from the median up, including the median if it's a
* real point. */
int sumAbove = cdf[j-1].sumToHere - cumulativeAt(lowerMedian, (int)medianPoint);
return floor(medianPoint - lowerCount)*mu - sumBelow + sumAbove - ceil(upperCount - medianPoint)*mu;
}
/* D_previous is the D vector for (i-1) clusters, or empty if i < 2.
* It's possible to do this even faster (and more incomprehensibly).
* See Grønlund et al. for that. */
/* Calculate C_i[m][j] given D_previous = D[i-1]. p. 4 */
KValue C_i(int i, const std::vector<KValue>& D_previous, int m, int j)
{
KValue f;
if (i == 1)
f = CC(0, m);
else
f = D_previous[std::min(j, m)] + CC(j, m);
return f;
}
/* Calculates the optimal cluster centres for the points. */
std::vector<KValue> optimalKMedian(int numClusters)
{
std::vector<std::vector<int>> T(numClusters+1, std::vector<int>(uniquePoints+1, 0));
std::vector<std::vector<KValue>> D(numClusters+1, std::vector<KValue>(uniquePoints+1, 0.0));
for (int i=1; i<numClusters+1; i++)
{
/* Stop if we achieve optimal clustering with fewer clusters than the
* user asked for. */
std::vector<KValue>& lastD = D[i-1];
if ((i != 1) && (lastD.back() == 0.0))
continue;
std::function<KValue(int, int)> M = [&](int m, int j) {
return C_i(i, lastD, m, j);
};
monotoneMatrixIndices(M, i, uniquePoints+1, i-1, uniquePoints+1,
T[i], D[i]);
}
/* Backtrack. The last cluster has to encompass everything, so the
* rightmost boundary of the last cluster is the last point in the
* array, hence given by the last position of T. Then the previous
* cluster transition boundary is given by where that cluster starts,
* and so on.
*/
int currentClusteringRange = uniquePoints;
std::vector<KValue> centres;
for (int i=numClusters; i>0; i--)
{
int newClusteringRange = T[i][currentClusteringRange];
/* Reconstruct the cluster that's the median point between
* new_cluster_range and cur_clustering_range. */
centres.push_back(medianPoint(newClusteringRange, currentClusteringRange));
currentClusteringRange = newClusteringRange;
}
std::sort(centres.begin(), centres.end());
return centres;
}
};
static std::vector<KValue> getPointsFromFluxmap(const Fluxmap& fluxmap)
{
FluxmapReader fmr(fluxmap);
std::vector<KValue> points;
while (!fmr.eof())
{
KValue point = fmr.findEvent(F_BIT_PULSE);
points.push_back(point);
}
return points;
}
/* Analyses the fluxmap and determines the optimal cluster centres for it. The
* number of clusters is taken from the size of the output array. */
std::vector<KValue> optimalKMedian(const std::vector<KValue>& points, int clusters)
{
KCluster kcluster(points);
return kcluster.optimalKMedian(clusters);
}
std::vector<unsigned> optimalKMedian(const Fluxmap& fluxmap, int clusters)
{
std::vector<unsigned> ticks;
for (KValue t : optimalKMedian(getPointsFromFluxmap(fluxmap), clusters))
ticks.push_back(t);
return ticks;
}

10
lib/kmedian.h Normal file
View File

@@ -0,0 +1,10 @@
#ifndef KMEDIAN_H
#define KMEDIAN_H
typedef float KValue;
extern std::vector<KValue> optimalKMedian(const std::vector<KValue>& points, int clusters);
extern std::vector<unsigned> optimalKMedian(const Fluxmap& fluxmap, int clusters);
#endif

View File

@@ -195,10 +195,26 @@ void readDiskCommand(AbstractDecoder& decoder)
std::cout << fmt::format("{} records, {} sectors; ",
track->rawrecords.size(),
track->sectors.size());
if (track->sectors.size() > 0)
std::cout << fmt::format("{:.2f}us clock ({:.0f}kHz); ",
track->sectors.begin()->clock / 1000.0,
1000000.0 / track->sectors.begin()->clock);
const std::vector<nanoseconds_t>& intervals = track->intervals;
bool first = true;
for (nanoseconds_t i : intervals)
{
if (!first)
std::cout << "/";
first = false;
std::cout << fmt::format("{:.2f}", i / 1000.0);
}
std::cout << " us or ";
first = true;
for (nanoseconds_t i : intervals)
{
if (!first)
std::cout << "/";
first = false;
std::cout << fmt::format("{:.0f}", 1000000.0 / i);
}
std::cout << fmt::format(" kHz interval\n");
for (auto& sector : track->sectors)
{
@@ -251,8 +267,7 @@ void readDiskCommand(AbstractDecoder& decoder)
std::cout << "\nRaw (undecoded) records follow:\n\n";
for (auto& record : track->rawrecords)
{
std::cout << fmt::format("I+{:.2f}us with {:.2f}us clock\n",
record.position.ns() / 1000.0, record.clock / 1000.0);
std::cout << fmt::format("I+{:.2f}us\n", record.position.ns() / 1000.0);
hexdump(std::cout, record.data);
std::cout << std::endl;
}
@@ -263,9 +278,9 @@ void readDiskCommand(AbstractDecoder& decoder)
std::cout << "\nDecoded sectors follow:\n\n";
for (auto& sector : track->sectors)
{
std::cout << fmt::format("{}.{:02}.{:02}: I+{:.2f}us with {:.2f}us clock: status {}\n",
std::cout << fmt::format("{}.{:02}.{:02}: I+{:.2f}us: status {}\n",
sector.logicalTrack, sector.logicalSide, sector.logicalSector,
sector.position.ns() / 1000.0, sector.clock / 1000.0,
sector.position.ns() / 1000.0,
sector.status);
hexdump(std::cout, sector.data);
std::cout << std::endl;

View File

@@ -9,7 +9,7 @@ public:
RawRecord() {}
Fluxmap::Position position;
nanoseconds_t clock = 0;
std::vector<nanoseconds_t> intervals;
int physicalTrack = 0;
int physicalSide = 0;
Bytes data;

View File

@@ -26,7 +26,7 @@ public:
Status status = Status::INTERNAL_ERROR;
Fluxmap::Position position;
nanoseconds_t clock = 0;
std::vector<nanoseconds_t> intervals;
nanoseconds_t headerStartTime = 0;
nanoseconds_t headerEndTime = 0;
nanoseconds_t dataStartTime = 0;

View File

@@ -20,6 +20,7 @@ public:
unsigned physicalSide;
std::shared_ptr<FluxSource> fluxsource;
std::unique_ptr<Fluxmap> fluxmap;
std::vector<nanoseconds_t> intervals;
std::vector<RawRecord> rawrecords;
std::vector<Sector> sectors;

View File

@@ -192,6 +192,7 @@ buildlibrary libbackend.a \
lib/fluxsource/streamfluxsource.cc \
lib/globals.cc \
lib/hexdump.cc \
lib/kmedian.cc \
lib/ldbs.cc \
lib/reader.cc \
lib/sector.cc \
@@ -258,6 +259,7 @@ buildsimpleprogram brother240tool \
libemu.a \
libfmt.a \
runtest amiga-test tests/amiga.cc
runtest bitaccumulator-test tests/bitaccumulator.cc
runtest bytes-test tests/bytes.cc
runtest compression-test tests/compression.cc
@@ -265,6 +267,7 @@ runtest dataspec-test tests/dataspec.cc
runtest flags-test tests/flags.cc
runtest fluxpattern-test tests/fluxpattern.cc
runtest fmmfm-test tests/fmmfm.cc
runtest kmedian-test tests/kmedian.cc
runtest kryoflux-test tests/kryoflux.cc
runtest ldbs-test tests/ldbs.cc
runtest amiga-test tests/amiga.cc

View File

@@ -10,6 +10,7 @@
#include "sector.h"
#include "track.h"
#include "fmt/format.h"
#include "kmedian.h"
static FlagGroup flags { &readerFlags };
@@ -50,6 +51,11 @@ static DoubleFlag signalLevelFactor(
"Clock detection signal level (min + (max-min)*factor).",
0.05);
static IntFlag bands(
{ "--bands" },
"Number of bands to use for k-median interval classification.",
3);
void setDecoderManualClockRate(double clockrate_us)
{
manualClockRate.setDefaultValue(clockrate_us);
@@ -83,54 +89,6 @@ static nanoseconds_t guessClock(const Fluxmap& fluxmap)
uint32_t noise_floor = min + (max-min)*noiseFloorFactor;
uint32_t signal_level = min + (max-min)*signalLevelFactor;
/* Find a point solidly within the first pulse. */
int pulseindex = 0;
while (pulseindex < 256)
{
if (buckets[pulseindex] > signal_level)
break;
pulseindex++;
}
if (pulseindex == -1)
return 0;
/* Find the upper and lower bounds of the pulse. */
int peaklo = pulseindex;
while (peaklo > 0)
{
if (buckets[peaklo] < noise_floor)
break;
peaklo--;
}
int peakhi = pulseindex;
while (peakhi < 255)
{
if (buckets[peakhi] < noise_floor)
break;
peakhi++;
}
/* Find the total accumulated size of the pulse. */
uint32_t total_size = 0;
for (int i = peaklo; i < peakhi; i++)
total_size += buckets[i];
/* Now find the median. */
uint32_t count = 0;
int median = peaklo;
while (median < peakhi)
{
count += buckets[median];
if (count > (total_size/2))
break;
median++;
}
std::cout << "\nClock detection histogram:" << std::endl;
bool skipping = true;
@@ -166,16 +124,13 @@ static nanoseconds_t guessClock(const Fluxmap& fluxmap)
std::cout << fmt::format("Noise floor: {}", noise_floor) << std::endl;
std::cout << fmt::format("Signal level: {}", signal_level) << std::endl;
std::cout << fmt::format("Peak start: {} ({:.2f} us)", peaklo, peaklo*US_PER_TICK) << std::endl;
std::cout << fmt::format("Peak end: {} ({:.2f} us)", peakhi, peakhi*US_PER_TICK) << std::endl;
std::cout << fmt::format("Median: {} ({:.2f} us)", median, median*US_PER_TICK) << std::endl;
/*
* Okay, the median should now be a good candidate for the (or a) clock.
* How this maps onto the actual clock rate depends on the encoding.
*/
std::vector<unsigned> centres = optimalKMedian(fluxmap, bands);
for (int i=0; i<bands; i++)
std::cout << fmt::format("Band #{}: {} {:.2f} us\n",
i, centres[i], centres[i] * US_PER_TICK);
return median * NS_PER_TICK;
return centres[0] * NS_PER_TICK;
}
int mainInspect(int argc, const char* argv[])
@@ -198,7 +153,7 @@ int mainInspect(int argc, const char* argv[])
nanoseconds_t clockPeriod = guessClock(*track->fluxmap);
std::cout << fmt::format("{:.2f} us clock detected.", (double)clockPeriod/1000.0) << std::flush;
FluxmapReader fmr(*track->fluxmap);
FluxmapReader fmr(*track->fluxmap, bands, false);
fmr.seek(seekFlag*1000000.0);
if (dumpFluxFlag)
@@ -270,7 +225,7 @@ int mainInspect(int argc, const char* argv[])
{
if (fmr.eof())
break;
bool b = fmr.readRawBit(clockPeriod);
bool b = fmr.readRawBit();
std::cout << (b ? 'X' : '-');
}

43
tests/betterassert.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef BETTERASSERT_H
#define BETTERASSERT_H
#include <assert.h>
namespace std {
template <class T>
static std::string to_string(const std::vector<T>& vector)
{
std::stringstream s;
s << "vector{";
bool first = true;
for (const T& t : vector)
{
if (!first)
s << ", ";
first = false;
s << t;
}
s << "}";
return s.str();
}
}
#undef assert
#define assertEquals(got, expected) assertImpl(__FILE__, __LINE__, got, expected)
template <class T>
static void assertImpl(const char filename[], int linenumber, T got, T expected)
{
if (got != expected)
{
std::cerr << "assertion failure at "
<< filename << ":" << linenumber
<< ": got " << std::to_string(got)
<< ", expected " << std::to_string(expected)
<< std::endl;
abort();
}
}
#endif

View File

@@ -2,84 +2,49 @@
#include "flags.h"
#include "fluxmap.h"
#include "decoders/fluxmapreader.h"
#include "betterassert.h"
#include <sstream>
FlagGroup flags { &fluxmapReaderFlags };
typedef std::vector<unsigned> ivector;
namespace std {
template <class T>
static std::string to_string(const std::vector<T>& vector)
{
std::stringstream s;
s << "vector{";
bool first = true;
for (const T& t : vector)
{
if (!first)
s << ", ";
first = false;
s << t;
}
s << "}";
return s.str();
}
}
#undef assert
#define assert(got, expected) assertImpl(__FILE__, __LINE__, got, expected)
template <class T>
static void assertImpl(const char filename[], int linenumber, T got, T expected)
{
if (got != expected)
{
std::cerr << "assertion failure at "
<< filename << ":" << linenumber
<< ": got " << std::to_string(got)
<< ", expected " << std::to_string(expected)
<< std::endl;
abort();
}
}
void test_patternconstruction()
{
{
FluxPattern fp(16, 0x0003);
assert(fp._bits, 16U);
assert(fp._intervals, ivector{ 1 });
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, ivector{ 1 });
}
{
FluxPattern fp(16, 0xc000);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 1, 15 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 1, 15 }));
}
{
FluxPattern fp(16, 0x0050);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 2, 5 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 2, 5 }));
}
{
FluxPattern fp(16, 0x0070);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 1, 1, 5 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 1, 1, 5 }));
}
{
FluxPattern fp(16, 0x0070);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 1, 1, 5 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 1, 1, 5 }));
}
{
FluxPattern fp(16, 0x0110);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 4, 5 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 4, 5 }));
}
}
@@ -92,16 +57,16 @@ void test_patternmatchingwithouttrailingzeros()
const unsigned closematch2[] = { 110, 110, 220, 110 };
FluxMatch match;
assert(fp.matches(&matching[4], match), true);
assert(match.intervals, 2U);
assertEquals(fp.matches(&matching[4], match), true);
assertEquals(match.intervals, 2U);
assert(fp.matches(&notmatching[4], match), false);
assertEquals(fp.matches(&notmatching[4], match), false);
assert(fp.matches(&closematch1[4], match), true);
assert(match.intervals, 2U);
assertEquals(fp.matches(&closematch1[4], match), true);
assertEquals(match.intervals, 2U);
assert(fp.matches(&closematch2[4], match), true);
assert(match.intervals, 2U);
assertEquals(fp.matches(&closematch2[4], match), true);
assertEquals(match.intervals, 2U);
}
void test_patternmatchingwithtrailingzeros()
@@ -113,16 +78,16 @@ void test_patternmatchingwithtrailingzeros()
const unsigned closematch2[] = { 110, 110, 220, 110, 220 };
FluxMatch match;
assert(fp.matches(&matching[5], match), true);
assert(match.intervals, 3U);
assertEquals(fp.matches(&matching[5], match), true);
assertEquals(match.intervals, 3U);
assert(fp.matches(&notmatching[5], match), false);
assertEquals(fp.matches(&notmatching[5], match), false);
assert(fp.matches(&closematch1[5], match), true);
assert(match.intervals, 3U);
assertEquals(fp.matches(&closematch1[5], match), true);
assertEquals(match.intervals, 3U);
assert(fp.matches(&closematch2[5], match), true);
assert(match.intervals, 3U);
assertEquals(fp.matches(&closematch2[5], match), true);
assertEquals(match.intervals, 3U);
}
int main(int argc, const char* argv[])

27
tests/kmedian.cc Normal file
View File

@@ -0,0 +1,27 @@
#include "globals.h"
#include <string.h>
#include "bytes.h"
#include "fluxmap.h"
#include "kmedian.h"
#include "fmt/format.h"
#include "betterassert.h"
static void testOptimalKMedian()
{
std::vector<float> points = { 22, 36, 35, 22, 22, 23, 37, 22, 35, 47 };
std::vector<float> clusters = optimalKMedian(points, 3);
assertEquals(clusters,
(std::vector<float>{
22, 35.5, 47
})
);
}
int main(int argc, const char* argv[])
{
testOptimalKMedian();
return 0;
}