Compare commits

...

10 Commits
ab ... kmedian

Author SHA1 Message Date
David Given
ce4cabde96 Rework the kmedian code to do all arithmetic in ticks rather than us. This
apparently improves things --- precision related, maybe?
2020-07-04 14:35:04 +01:00
David Given
ffbb6609c8 Merge from trunk. 2020-07-04 12:17:45 +01:00
David Given
5b66f803f3 First stage of the massive refactor to rework things to use the k-median
algorithm.
2020-06-27 20:17:33 +02:00
David Given
33d14a5fbe Bash the decoders into compiling-but-not-yet-working with the kmedian
classifier.
2020-06-27 14:46:35 +01:00
David Given
5d27b6d9aa Use the k-median code in the inspector for actual reals. 2020-06-27 11:55:53 +02:00
David Given
0d90dcee0f Replace the kmedian code with the weighted version --- vastly faster (a factor
of ten).
2020-06-27 11:25:03 +02:00
David Given
c89f5b9649 Add a test case for the kmedian code. 2020-06-26 20:53:55 +02:00
David Given
c5bc9a8cee Merge from trunk. 2020-06-26 16:00:43 +02:00
David Given
347e4d59a3 Fix the incredibly stupid bug when determining the median. 2020-06-26 15:24:04 +02:00
David Given
6e56cc3b0a First attempt at the optimised k-median code from kristomu@github. 2020-06-26 00:15:32 +02:00
40 changed files with 590 additions and 197 deletions

View File

@@ -13,6 +13,9 @@ class AesLanierDecoder : public AbstractDecoder
public:
virtual ~AesLanierDecoder() {}
int getDecoderBands() const { return 4; }
bool isInterleaved() const { return true; }
RecordType advanceToNextRecord();
void decodeSectorRecord();
};

View File

@@ -26,8 +26,8 @@ static Bytes reverse_bits(const Bytes& input)
AbstractDecoder::RecordType AesLanierDecoder::advanceToNextRecord()
{
_sector->clock = _fmr->seekToPattern(SECTOR_PATTERN);
if (_fmr->eof() || !_sector->clock)
nanoseconds_t clock = _fmr->seekToPattern(SECTOR_PATTERN);
if (_fmr->eof() || !clock)
return UNKNOWN_RECORD;
return SECTOR_RECORD;
}

View File

@@ -13,7 +13,7 @@ class Sector;
class Fluxmap;
class SectorSet;
class AmigaDecoder : public AbstractDecoder
class AmigaDecoder : public AbstractMfmDecoder
{
public:
virtual ~AmigaDecoder() {}

View File

@@ -23,8 +23,8 @@ static const FluxPattern SECTOR_PATTERN(48, AMIGA_SECTOR_RECORD);
AbstractDecoder::RecordType AmigaDecoder::advanceToNextRecord()
{
_sector->clock = _fmr->seekToPattern(SECTOR_PATTERN);
if (_fmr->eof() || !_sector->clock)
_fmr->seekToPattern(SECTOR_PATTERN);
if (_fmr->eof())
return UNKNOWN_RECORD;
return SECTOR_RECORD;
}

View File

@@ -10,7 +10,7 @@
class Sector;
class Fluxmap;
class Apple2Decoder : public AbstractDecoder
class Apple2Decoder : public AbstractGcrDecoder
{
public:
virtual ~Apple2Decoder() {}

View File

@@ -68,7 +68,7 @@ uint8_t combine(uint16_t word)
AbstractDecoder::RecordType Apple2Decoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return RecordType::SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -21,6 +21,9 @@ class BrotherDecoder : public AbstractDecoder
public:
virtual ~BrotherDecoder() {}
int getDecoderBands() const { return 2; }
bool isInterleaved() const { return false; }
RecordType advanceToNextRecord();
void decodeSectorRecord();
void decodeDataRecord();

View File

@@ -57,7 +57,7 @@ static int decode_header_gcr(uint16_t word)
AbstractDecoder::RecordType BrotherDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return RecordType::SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -8,7 +8,7 @@
class Sector;
class Fluxmap;
class Commodore64Decoder : public AbstractDecoder
class Commodore64Decoder : public AbstractGcrDecoder
{
public:
virtual ~Commodore64Decoder() {}

View File

@@ -55,7 +55,7 @@ static Bytes decode(const std::vector<bool>& bits)
AbstractDecoder::RecordType Commodore64Decoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return RecordType::SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -55,7 +55,7 @@ static Bytes decode(const std::vector<bool>& bits)
AbstractDecoder::RecordType DurangoF85Decoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return RecordType::SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -8,7 +8,7 @@
class Sector;
class Fluxmap;
class DurangoF85Decoder : public AbstractDecoder
class DurangoF85Decoder : public AbstractGcrDecoder
{
public:
virtual ~DurangoF85Decoder() {}

View File

@@ -102,7 +102,7 @@ static uint16_t checksum(const Bytes& bytes)
AbstractDecoder::RecordType Fb100Decoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(SECTOR_ID_PATTERN, matcher);
_fmr->seekToPattern(SECTOR_ID_PATTERN, matcher);
if (matcher == &SECTOR_ID_PATTERN)
return RecordType::SECTOR_RECORD;
return RecordType::UNKNOWN_RECORD;
@@ -132,4 +132,4 @@ void Fb100Decoder::decodeSectorRecord()
_sector->data.writer().append(id.slice(5, 12)).append(payload);
_sector->status = (wantPayloadCrc == gotPayloadCrc) ? Sector::OK : Sector::BAD_CHECKSUM;
}
}

View File

@@ -9,7 +9,7 @@ class Sector;
class Fluxmap;
class Track;
class Fb100Decoder : public AbstractDecoder
class Fb100Decoder : public AbstractFmDecoder
{
public:
virtual ~Fb100Decoder() {}

View File

@@ -92,7 +92,7 @@ const FluxMatchers ANY_RECORD_PATTERN(
AbstractDecoder::RecordType IbmDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
/* If this is the MFM prefix byte, the the decoder is going to expect three
* extra bytes on the front of the header. */

View File

@@ -29,7 +29,7 @@ struct IbmIdam
uint8_t crc[2];
};
class IbmDecoder : public AbstractDecoder
class IbmDecoder : public AbstractMfmDecoder
{
public:
IbmDecoder(unsigned sectorBase, bool ignoreSideByte=false,

View File

@@ -127,7 +127,7 @@ uint8_t decode_side(uint8_t side)
AbstractDecoder::RecordType MacintoshDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -10,7 +10,7 @@
class Sector;
class Fluxmap;
class MacintoshDecoder : public AbstractDecoder
class MacintoshDecoder : public AbstractGcrDecoder
{
public:
virtual ~MacintoshDecoder() {}

View File

@@ -35,7 +35,7 @@ AbstractDecoder::RecordType MxDecoder::advanceToNextRecord()
{
/* First sector in the track: look for the sync marker. */
const FluxMatcher* matcher = nullptr;
_sector->clock = _clock = _fmr->seekToPattern(ID_PATTERN, matcher);
_fmr->seekToPattern(ID_PATTERN, matcher);
readRawBits(32); /* skip the ID mark */
_logicalTrack = decodeFmMfm(readRawBits(32)).slice(0, 32).reader().read_be16();
}
@@ -44,13 +44,6 @@ AbstractDecoder::RecordType MxDecoder::advanceToNextRecord()
/* That was the last sector on the disk. */
return UNKNOWN_RECORD;
}
else
{
/* Otherwise we assume the clock from the first sector is still valid.
* The decoder framwork will automatically stop when we hit the end of
* the track. */
_sector->clock = _clock;
}
_currentSector++;
return SECTOR_RECORD;

View File

@@ -3,7 +3,7 @@
#include "decoders/decoders.h"
class MxDecoder : public AbstractDecoder
class MxDecoder : public AbstractFmDecoder
{
public:
virtual ~MxDecoder() {}

View File

@@ -57,7 +57,7 @@ static Bytes decode(const std::vector<bool>& bits)
AbstractDecoder::RecordType Victor9kDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_sector->clock = _fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
_fmr->seekToPattern(ANY_RECORD_PATTERN, matcher);
if (matcher == &SECTOR_RECORD_PATTERN)
return SECTOR_RECORD;
if (matcher == &DATA_RECORD_PATTERN)

View File

@@ -9,7 +9,7 @@
class Sector;
class Fluxmap;
class Victor9kDecoder : public AbstractDecoder
class Victor9kDecoder : public AbstractGcrDecoder
{
public:
virtual ~Victor9kDecoder() {}

View File

@@ -18,7 +18,7 @@ AbstractDecoder::RecordType ZilogMczDecoder::advanceToNextRecord()
{
const FluxMatcher* matcher = nullptr;
_fmr->seekToIndexMark();
_sector->clock = _fmr->seekToPattern(SECTOR_START_PATTERN, matcher);
_fmr->seekToPattern(SECTOR_START_PATTERN, matcher);
if (matcher == &SECTOR_START_PATTERN)
return SECTOR_RECORD;
return UNKNOWN_RECORD;

View File

@@ -4,7 +4,7 @@
class Sector;
class Fluxmap;
class ZilogMczDecoder : public AbstractDecoder
class ZilogMczDecoder : public AbstractGcrDecoder
{
public:
virtual ~ZilogMczDecoder() {}

View File

@@ -16,22 +16,23 @@ void AbstractDecoder::decodeToSectors(Track& track)
Sector sector;
sector.physicalSide = track.physicalSide;
sector.physicalTrack = track.physicalTrack;
FluxmapReader fmr(*track.fluxmap);
FluxmapReader fmr(*track.fluxmap, getDecoderBands(), isInterleaved());
_track = &track;
_sector = &sector;
_fmr = &fmr;
_track->intervals = fmr.intervals();
beginTrack();
for (;;)
{
Fluxmap::Position recordStart = fmr.tell();
sector.clock = 0;
sector.intervals = fmr.intervals();
sector.status = Sector::MISSING;
sector.data.clear();
sector.logicalSector = sector.logicalSide = sector.logicalTrack = 0;
RecordType r = advanceToNextRecord();
if (fmr.eof() || !sector.clock)
if (fmr.eof() || sector.intervals.empty())
return;
if ((r == UNKNOWN_RECORD) || (r == DATA_RECORD))
{
@@ -80,11 +81,11 @@ void AbstractDecoder::pushRecord(const Fluxmap::Position& start, const Fluxmap::
RawRecord record;
record.physicalSide = _track->physicalSide;
record.physicalTrack = _track->physicalTrack;
record.clock = _sector->clock;
record.intervals = _sector->intervals;
record.position = start;
_fmr->seek(start);
record.data = toBytes(_fmr->readRawBits(end, _sector->clock));
record.data = toBytes(_fmr->readRawBits(end));
_track->rawrecords.push_back(record);
_fmr->seek(here);
}

View File

@@ -44,7 +44,7 @@ public:
void pushRecord(const Fluxmap::Position& start, const Fluxmap::Position& end);
std::vector<bool> readRawBits(unsigned count)
{ return _fmr->readRawBits(count, _sector->clock); }
{ return _fmr->readRawBits(count); }
Fluxmap::Position tell()
{ return _fmr->tell(); }
@@ -58,6 +58,9 @@ public:
virtual std::set<unsigned> requiredSectors(Track& track) const;
protected:
virtual int getDecoderBands() const = 0;
virtual bool isInterleaved() const = 0;
virtual void beginTrack() {};
virtual RecordType advanceToNextRecord() = 0;
virtual void decodeSectorRecord() = 0;
@@ -68,4 +71,25 @@ protected:
Sector* _sector;
};
class AbstractFmDecoder : public AbstractDecoder
{
protected:
int getDecoderBands() const { return 2; }
bool isInterleaved() const { return false; }
};
class AbstractMfmDecoder : public AbstractDecoder
{
protected:
int getDecoderBands() const { return 3; }
bool isInterleaved() const { return true; }
};
class AbstractGcrDecoder : public AbstractDecoder
{
protected:
int getDecoderBands() const { return 3; }
bool isInterleaved() const { return false; }
};
#endif

View File

@@ -3,6 +3,7 @@
#include "decoders/fluxmapreader.h"
#include "flags.h"
#include "protocol.h"
#include "kmedian.h"
#include "fmt/format.h"
#include <numeric>
#include <math.h>
@@ -30,6 +31,26 @@ static DoubleFlag minimumClockUs(
"Refuse to detect clocks shorter than this, to avoid false positives.",
0.75);
FluxmapReader::FluxmapReader(const Fluxmap& fluxmap):
_fluxmap(fluxmap),
_bytes(fluxmap.ptr()),
_size(fluxmap.bytes()),
_isInterleaved(false)
{
rewind();
}
FluxmapReader::FluxmapReader(const Fluxmap& fluxmap, int bands, bool isInterleaved):
_fluxmap(fluxmap),
_bytes(fluxmap.ptr()),
_size(fluxmap.bytes()),
_isInterleaved(isInterleaved)
{
for (unsigned ticks : optimalKMedian(fluxmap, bands))
_intervals.push_back(ticks * NS_PER_TICK);
rewind();
}
uint8_t FluxmapReader::getNextEvent(unsigned& ticks)
{
ticks = 0;
@@ -65,8 +86,9 @@ unsigned FluxmapReader::findEvent(uint8_t target)
}
}
unsigned FluxmapReader::readInterval(nanoseconds_t clock)
unsigned FluxmapReader::readInterval()
{
nanoseconds_t clock = _intervals.front();
unsigned thresholdTicks = (clock * pulseDebounceThreshold) / NS_PER_TICK;
unsigned ticks = 0;
@@ -94,7 +116,8 @@ static int findLowestSetBit(uint64_t value)
}
FluxPattern::FluxPattern(unsigned bits, uint64_t pattern):
_bits(bits)
_bits(bits),
_pattern(pattern)
{
const uint64_t TOPBIT = 1ULL << 63;
@@ -251,9 +274,9 @@ void FluxmapReader::seekToIndexMark()
_pos.zeroes = 0;
}
bool FluxmapReader::readRawBit(nanoseconds_t clockPeriod)
bool FluxmapReader::readRawBit()
{
assert(clockPeriod != 0);
assert(!_intervals.empty());
if (_pos.zeroes)
{
@@ -261,30 +284,38 @@ bool FluxmapReader::readRawBit(nanoseconds_t clockPeriod)
return false;
}
nanoseconds_t interval = readInterval(clockPeriod)*NS_PER_TICK;
double clocks = (double)interval / clockPeriod + clockIntervalBias;
nanoseconds_t interval = readInterval()*NS_PER_TICK;
int clocks = 0;
while (clocks < _intervals.size()-1)
{
float median = (_intervals[clocks] + _intervals[clocks+1])/2.0;
if (interval < median)
break;
clocks++;
}
if (clocks < 1.0)
clocks = 1.0;
_pos.zeroes = (int)round(clocks) - 1;
if (_isInterleaved)
clocks++;
_pos.zeroes = clocks;
return true;
}
std::vector<bool> FluxmapReader::readRawBits(unsigned count, nanoseconds_t clockPeriod)
std::vector<bool> FluxmapReader::readRawBits(unsigned count)
{
std::vector<bool> result;
while (!eof() && count--)
{
bool b = readRawBit(clockPeriod);
bool b = readRawBit();
result.push_back(b);
}
return result;
}
std::vector<bool> FluxmapReader::readRawBits(const Fluxmap::Position& until, nanoseconds_t clockPeriod)
std::vector<bool> FluxmapReader::readRawBits(const Fluxmap::Position& until)
{
std::vector<bool> result;
while (!eof() && (_pos.bytes < until.bytes))
result.push_back(readRawBit(clockPeriod));
result.push_back(readRawBit());
return result;
}

View File

@@ -41,6 +41,7 @@ private:
std::vector<unsigned> _intervals;
unsigned _length;
unsigned _bits;
uint64_t _pattern;
unsigned _highzeroes;
bool _lowzero = false;
@@ -67,14 +68,8 @@ private:
class FluxmapReader
{
public:
FluxmapReader(const Fluxmap& fluxmap):
_fluxmap(fluxmap),
_bytes(fluxmap.ptr()),
_size(fluxmap.bytes())
{
rewind();
}
FluxmapReader(const Fluxmap& fluxmap);
FluxmapReader(const Fluxmap& fluxmap, int bands, bool isInterleaved);
FluxmapReader(const Fluxmap&& fluxmap) = delete;
void rewind()
@@ -98,7 +93,7 @@ public:
uint8_t getNextEvent(unsigned& ticks);
unsigned findEvent(uint8_t bits);
unsigned readInterval(nanoseconds_t clock); /* with debounce support */
unsigned readInterval(); /* with debounce support */
/* Important! You can only reliably seek to 1 bits. */
void seek(nanoseconds_t ns);
@@ -107,15 +102,19 @@ public:
nanoseconds_t seekToPattern(const FluxMatcher& pattern);
nanoseconds_t seekToPattern(const FluxMatcher& pattern, const FluxMatcher*& matching);
bool readRawBit(nanoseconds_t clockPeriod);
std::vector<bool> readRawBits(unsigned count, nanoseconds_t clockPeriod);
std::vector<bool> readRawBits(const Fluxmap::Position& until, nanoseconds_t clockPeriod);
bool readRawBit();
std::vector<bool> readRawBits(unsigned count);
std::vector<bool> readRawBits(const Fluxmap::Position& until);
const std::vector<nanoseconds_t> intervals() const { return _intervals; };
private:
const Fluxmap& _fluxmap;
const uint8_t* _bytes;
const size_t _size;
Fluxmap::Position _pos;
std::vector<nanoseconds_t> _intervals;
const bool _isInterleaved;
};
#endif

View File

@@ -75,7 +75,6 @@ void ImageWriter::writeCsv(const std::string& filename)
"\"Logical track\","
"\"Logical side\","
"\"Logical sector\","
"\"Clock (ns)\","
"\"Header start (ns)\","
"\"Header end (ns)\","
"\"Data start (ns)\","
@@ -96,11 +95,10 @@ void ImageWriter::writeCsv(const std::string& filename)
if (!sector)
f << fmt::format(",,{},,,,,,,,MISSING\n", sectorId);
else
f << fmt::format("{},{},{},{},{},{},{},{},{},{},{}\n",
f << fmt::format("{},{},{},{},{},{},{},{},{},{}\n",
sector->logicalTrack,
sector->logicalSide,
sector->logicalSector,
sector->clock,
sector->headerStartTime,
sector->headerEndTime,
sector->dataStartTime,

322
lib/kmedian.cc Normal file
View File

@@ -0,0 +1,322 @@
#include "globals.h"
#include "fluxmap.h"
#include "decoders/fluxmapreader.h"
#include "protocol.h"
#include "kmedian.h"
#include <algorithm>
#include <numeric>
#include <math.h>
#include <limits.h>
/* Implement a O(kn log n) method for optimal 1D K-median. The K-median problem
* consists of finding k clusters so that the sum of absolute distances from
* each of the n points to the closest cluster is minimized.
*
* GRØNLUND, Allan, et al. Fast exact k-means, k-medians and Bregman divergence
* clustering in 1d. arXiv preprint arXiv:1701.07204, 2017.
*
* The points all have one of a very small range of values (compared to the
* number of points). Since k-medians clustering is either a point or the mean
* of two points, we can then do calculations with n being the number of
* distinct points, rather than the number of points, so that O(kn log n)
* becomes much quicker in concrete terms.
*
* Finding the median takes O(log n) time instead of O(1) time, which increases
* the constant factor in front of the kn log n term.
*/
class KCluster
{
public:
struct CDF
{
int pointsSoFar;
KValue sumToHere;
KValue thisValue;
};
int uniquePoints; /* number of unique points */
std::vector<struct CDF> cdf; /* cumulative sum of points */
KCluster(const std::vector<KValue>& inputPoints)
{
std::vector<KValue> points = inputPoints;
std::sort(points.begin(), points.end());
KValue current = NAN;
int count = 0;
KValue sum = 0.0;
for (KValue point : points)
{
if (point != current)
{
struct CDF empty = {0};
cdf.push_back(empty);
}
sum += point;
count++;
auto& thiscdf = cdf.back();
thiscdf.pointsSoFar = count;
thiscdf.sumToHere = sum;
thiscdf.thisValue = point;
current = point;
}
uniquePoints = cdf.size();
}
/* Determines the median point of all points between the ith category
* in cdf (inclusive), and the jth (exclusive). It's used to
* reconstruct clusters later.
*/
KValue medianPoint(int i, int j)
{
if (i >= j)
return 0;
int lowerCount = 0;
if (i > 0)
lowerCount = cdf[i-1].pointsSoFar;
int upperCount = cdf[j-1].pointsSoFar;
/* Note, this is not the index, but number of points start inclusive,
* which is why the -1 has to be turned into +1. */
KValue medianPoint = (lowerCount + upperCount + 1) / 2.0;
auto lowerMedian = std::lower_bound(cdf.begin(), cdf.end(), (int)medianPoint,
[&](const struct CDF& lhs, int rhs) {
return lhs.pointsSoFar < rhs;
}
);
if (((int)ceil(medianPoint) == (int)floor(medianPoint))
|| ((int)floor(medianPoint) != lowerMedian->pointsSoFar))
return lowerMedian->thisValue;
else
return (lowerMedian->thisValue + (lowerMedian+1)->thisValue) / 2.0;
}
/* Find the (location of the) minimum value of each row of an nxn matrix in
* n log n time, given that the matrix is monotone (by the definition of
* Grønlund et al.) and defined by a function M that takes row and column
* as parameters. All boundary values are closed, [minRow, maxRow]
* p. 197 of
* AGGARWAL, Alok, et al. Geometric applications of a matrix-searching
* algorithm. Algorithmica, 1987, 2.1-4: 195-208.
*/
void monotoneMatrixIndices(std::function<KValue(int, int)>& M,
int minRow, int maxRow, int minCol, int maxCol,
std::vector<int>& Tout,
std::vector<KValue>& Dout)
{
if (maxRow == minRow)
return;
int currentRow = minRow + (maxRow-minRow)/2;
/* Find the minimum column for this row. */
KValue minValue = INT_MAX;
int minHere = minCol;
for (int i = minCol; i<=maxCol; i++)
{
KValue v = M(currentRow, i);
if (v < minValue)
{
minValue = v;
minHere = i;
}
}
if (Tout[currentRow])
throw std::runtime_error("tried to set a variable already set");
Tout[currentRow] = minHere;
Dout[currentRow] = M(currentRow, minHere);
/* No lower row can have a minimum to the right of the current minimum.
* Recurse on that assumption. */
monotoneMatrixIndices(M, minRow, currentRow, minCol, minHere+1, Tout, Dout);
/* And no higher row can have a minimum to the left. */
monotoneMatrixIndices(M, currentRow+1, maxRow, minHere, maxCol, Tout, Dout);
}
/* If item is the first cdf value with count at
* or above i, returns the cumulative sum up to the ith entry of the
* underlying sorted points list.
*/
KValue cumulativeAt(std::vector<struct CDF>::iterator item, int i)
{
KValue sumBelow = 0.0;
int numPointsBelow = 0;
if (item != cdf.begin())
{
sumBelow = (item-1)->sumToHere;
numPointsBelow = (item-1)->pointsSoFar;
}
return sumBelow + item->thisValue*(i - numPointsBelow);
}
/* Grønlund et al.'s CC function for the K-median problem.
*
* CC(i,j) is the cost of grouping points_i...points_j into one cluster
* with the optimal cluster point (the median point). By programming
* convention, the interval is half-open and indexed from 0, unlike the
* paper's convention of closed intervals indexed from 1.
*
* Note: i and j are indices onto weighted_cdf. So e.g. if i = 0, j = 2 and
* weighted cdf is [[2, 0, 0], [4, 2, 1], [5, 2, 2]], then that is the cost
* of clustering all points between the one described by the zeroth weighted
* cdf entry, and up to (but not including) the last. In other words, it's
* CC([0, 0, 1, 1], 0, 5).
*/
KValue CC(int i, int j)
{
if (i >= j)
return 0;
int lowerCount = 0;
if (i > 0)
lowerCount = cdf[i-1].pointsSoFar;
int upperCount = cdf[j-1].pointsSoFar;
/* Note, this is not the index, but number of points start inclusive,
* which is why the -1 has to be turned into +1. */
KValue medianPoint = (lowerCount + upperCount + 1) / 2.0;
auto lowerMedian = std::lower_bound(cdf.begin(), cdf.end(), (int)medianPoint,
[&](const struct CDF& lhs, int rhs) {
return lhs.pointsSoFar < rhs;
}
);
KValue mu;
if (((int)ceil(medianPoint) == (int)floor(medianPoint))
|| ((int)floor(medianPoint) != lowerMedian->pointsSoFar))
mu = lowerMedian->thisValue;
else
mu = (lowerMedian->thisValue + (lowerMedian+1)->thisValue) / 2.0;
/* Lower part: entry i to median point between i and j, median excluded. */
int sumBelow = cumulativeAt(lowerMedian, (int)medianPoint);
if (i > 0)
sumBelow -= cdf[i-1].sumToHere;
/* Upper part: everything from the median up, including the median if it's a
* real point. */
int sumAbove = cdf[j-1].sumToHere - cumulativeAt(lowerMedian, (int)medianPoint);
return floor(medianPoint - lowerCount)*mu - sumBelow + sumAbove - ceil(upperCount - medianPoint)*mu;
}
/* D_previous is the D vector for (i-1) clusters, or empty if i < 2.
* It's possible to do this even faster (and more incomprehensibly).
* See Grønlund et al. for that. */
/* Calculate C_i[m][j] given D_previous = D[i-1]. p. 4 */
KValue C_i(int i, const std::vector<KValue>& D_previous, int m, int j)
{
KValue f;
if (i == 1)
f = CC(0, m);
else
f = D_previous[std::min(j, m)] + CC(j, m);
return f;
}
/* Calculates the optimal cluster centres for the points. */
std::vector<KValue> optimalKMedian(int numClusters)
{
std::vector<std::vector<int>> T(numClusters+1, std::vector<int>(uniquePoints+1, 0));
std::vector<std::vector<KValue>> D(numClusters+1, std::vector<KValue>(uniquePoints+1, 0.0));
for (int i=1; i<numClusters+1; i++)
{
/* Stop if we achieve optimal clustering with fewer clusters than the
* user asked for. */
std::vector<KValue>& lastD = D[i-1];
if ((i != 1) && (lastD.back() == 0.0))
continue;
std::function<KValue(int, int)> M = [&](int m, int j) {
return C_i(i, lastD, m, j);
};
monotoneMatrixIndices(M, i, uniquePoints+1, i-1, uniquePoints+1,
T[i], D[i]);
}
/* Backtrack. The last cluster has to encompass everything, so the
* rightmost boundary of the last cluster is the last point in the
* array, hence given by the last position of T. Then the previous
* cluster transition boundary is given by where that cluster starts,
* and so on.
*/
int currentClusteringRange = uniquePoints;
std::vector<KValue> centres;
for (int i=numClusters; i>0; i--)
{
int newClusteringRange = T[i][currentClusteringRange];
/* Reconstruct the cluster that's the median point between
* new_cluster_range and cur_clustering_range. */
centres.push_back(medianPoint(newClusteringRange, currentClusteringRange));
currentClusteringRange = newClusteringRange;
}
std::sort(centres.begin(), centres.end());
return centres;
}
};
static std::vector<KValue> getPointsFromFluxmap(const Fluxmap& fluxmap)
{
FluxmapReader fmr(fluxmap);
std::vector<KValue> points;
while (!fmr.eof())
{
KValue point = fmr.findEvent(F_BIT_PULSE);
points.push_back(point);
}
return points;
}
/* Analyses the fluxmap and determines the optimal cluster centres for it. The
* number of clusters is taken from the size of the output array. */
std::vector<KValue> optimalKMedian(const std::vector<KValue>& points, int clusters)
{
KCluster kcluster(points);
return kcluster.optimalKMedian(clusters);
}
std::vector<unsigned> optimalKMedian(const Fluxmap& fluxmap, int clusters)
{
std::vector<unsigned> ticks;
for (KValue t : optimalKMedian(getPointsFromFluxmap(fluxmap), clusters))
ticks.push_back(t);
return ticks;
}

10
lib/kmedian.h Normal file
View File

@@ -0,0 +1,10 @@
#ifndef KMEDIAN_H
#define KMEDIAN_H
typedef float KValue;
extern std::vector<KValue> optimalKMedian(const std::vector<KValue>& points, int clusters);
extern std::vector<unsigned> optimalKMedian(const Fluxmap& fluxmap, int clusters);
#endif

View File

@@ -195,10 +195,26 @@ void readDiskCommand(AbstractDecoder& decoder)
std::cout << fmt::format("{} records, {} sectors; ",
track->rawrecords.size(),
track->sectors.size());
if (track->sectors.size() > 0)
std::cout << fmt::format("{:.2f}us clock ({:.0f}kHz); ",
track->sectors.begin()->clock / 1000.0,
1000000.0 / track->sectors.begin()->clock);
const std::vector<nanoseconds_t>& intervals = track->intervals;
bool first = true;
for (nanoseconds_t i : intervals)
{
if (!first)
std::cout << "/";
first = false;
std::cout << fmt::format("{:.2f}", i / 1000.0);
}
std::cout << " us or ";
first = true;
for (nanoseconds_t i : intervals)
{
if (!first)
std::cout << "/";
first = false;
std::cout << fmt::format("{:.0f}", 1000000.0 / i);
}
std::cout << fmt::format(" kHz interval\n");
for (auto& sector : track->sectors)
{
@@ -251,8 +267,7 @@ void readDiskCommand(AbstractDecoder& decoder)
std::cout << "\nRaw (undecoded) records follow:\n\n";
for (auto& record : track->rawrecords)
{
std::cout << fmt::format("I+{:.2f}us with {:.2f}us clock\n",
record.position.ns() / 1000.0, record.clock / 1000.0);
std::cout << fmt::format("I+{:.2f}us\n", record.position.ns() / 1000.0);
hexdump(std::cout, record.data);
std::cout << std::endl;
}
@@ -263,9 +278,9 @@ void readDiskCommand(AbstractDecoder& decoder)
std::cout << "\nDecoded sectors follow:\n\n";
for (auto& sector : track->sectors)
{
std::cout << fmt::format("{}.{:02}.{:02}: I+{:.2f}us with {:.2f}us clock: status {}\n",
std::cout << fmt::format("{}.{:02}.{:02}: I+{:.2f}us: status {}\n",
sector.logicalTrack, sector.logicalSide, sector.logicalSector,
sector.position.ns() / 1000.0, sector.clock / 1000.0,
sector.position.ns() / 1000.0,
sector.status);
hexdump(std::cout, sector.data);
std::cout << std::endl;

View File

@@ -9,7 +9,7 @@ public:
RawRecord() {}
Fluxmap::Position position;
nanoseconds_t clock = 0;
std::vector<nanoseconds_t> intervals;
int physicalTrack = 0;
int physicalSide = 0;
Bytes data;

View File

@@ -26,7 +26,7 @@ public:
Status status = Status::INTERNAL_ERROR;
Fluxmap::Position position;
nanoseconds_t clock = 0;
std::vector<nanoseconds_t> intervals;
nanoseconds_t headerStartTime = 0;
nanoseconds_t headerEndTime = 0;
nanoseconds_t dataStartTime = 0;

View File

@@ -20,6 +20,7 @@ public:
unsigned physicalSide;
std::shared_ptr<FluxSource> fluxsource;
std::unique_ptr<Fluxmap> fluxmap;
std::vector<nanoseconds_t> intervals;
std::vector<RawRecord> rawrecords;
std::vector<Sector> sectors;

View File

@@ -192,6 +192,7 @@ buildlibrary libbackend.a \
lib/fluxsource/streamfluxsource.cc \
lib/globals.cc \
lib/hexdump.cc \
lib/kmedian.cc \
lib/ldbs.cc \
lib/reader.cc \
lib/sector.cc \
@@ -258,6 +259,7 @@ buildsimpleprogram brother240tool \
libemu.a \
libfmt.a \
runtest amiga-test tests/amiga.cc
runtest bitaccumulator-test tests/bitaccumulator.cc
runtest bytes-test tests/bytes.cc
runtest compression-test tests/compression.cc
@@ -265,6 +267,7 @@ runtest dataspec-test tests/dataspec.cc
runtest flags-test tests/flags.cc
runtest fluxpattern-test tests/fluxpattern.cc
runtest fmmfm-test tests/fmmfm.cc
runtest kmedian-test tests/kmedian.cc
runtest kryoflux-test tests/kryoflux.cc
runtest ldbs-test tests/ldbs.cc
runtest amiga-test tests/amiga.cc

View File

@@ -10,6 +10,7 @@
#include "sector.h"
#include "track.h"
#include "fmt/format.h"
#include "kmedian.h"
static FlagGroup flags { &readerFlags };
@@ -50,6 +51,11 @@ static DoubleFlag signalLevelFactor(
"Clock detection signal level (min + (max-min)*factor).",
0.05);
static IntFlag bands(
{ "--bands" },
"Number of bands to use for k-median interval classification.",
3);
void setDecoderManualClockRate(double clockrate_us)
{
manualClockRate.setDefaultValue(clockrate_us);
@@ -83,54 +89,6 @@ static nanoseconds_t guessClock(const Fluxmap& fluxmap)
uint32_t noise_floor = min + (max-min)*noiseFloorFactor;
uint32_t signal_level = min + (max-min)*signalLevelFactor;
/* Find a point solidly within the first pulse. */
int pulseindex = 0;
while (pulseindex < 256)
{
if (buckets[pulseindex] > signal_level)
break;
pulseindex++;
}
if (pulseindex == -1)
return 0;
/* Find the upper and lower bounds of the pulse. */
int peaklo = pulseindex;
while (peaklo > 0)
{
if (buckets[peaklo] < noise_floor)
break;
peaklo--;
}
int peakhi = pulseindex;
while (peakhi < 255)
{
if (buckets[peakhi] < noise_floor)
break;
peakhi++;
}
/* Find the total accumulated size of the pulse. */
uint32_t total_size = 0;
for (int i = peaklo; i < peakhi; i++)
total_size += buckets[i];
/* Now find the median. */
uint32_t count = 0;
int median = peaklo;
while (median < peakhi)
{
count += buckets[median];
if (count > (total_size/2))
break;
median++;
}
std::cout << "\nClock detection histogram:" << std::endl;
bool skipping = true;
@@ -166,16 +124,13 @@ static nanoseconds_t guessClock(const Fluxmap& fluxmap)
std::cout << fmt::format("Noise floor: {}", noise_floor) << std::endl;
std::cout << fmt::format("Signal level: {}", signal_level) << std::endl;
std::cout << fmt::format("Peak start: {} ({:.2f} us)", peaklo, peaklo*US_PER_TICK) << std::endl;
std::cout << fmt::format("Peak end: {} ({:.2f} us)", peakhi, peakhi*US_PER_TICK) << std::endl;
std::cout << fmt::format("Median: {} ({:.2f} us)", median, median*US_PER_TICK) << std::endl;
/*
* Okay, the median should now be a good candidate for the (or a) clock.
* How this maps onto the actual clock rate depends on the encoding.
*/
std::vector<unsigned> centres = optimalKMedian(fluxmap, bands);
for (int i=0; i<bands; i++)
std::cout << fmt::format("Band #{}: {} {:.2f} us\n",
i, centres[i], centres[i] * US_PER_TICK);
return median * NS_PER_TICK;
return centres[0] * NS_PER_TICK;
}
int mainInspect(int argc, const char* argv[])
@@ -198,7 +153,7 @@ int mainInspect(int argc, const char* argv[])
nanoseconds_t clockPeriod = guessClock(*track->fluxmap);
std::cout << fmt::format("{:.2f} us clock detected.", (double)clockPeriod/1000.0) << std::flush;
FluxmapReader fmr(*track->fluxmap);
FluxmapReader fmr(*track->fluxmap, bands, false);
fmr.seek(seekFlag*1000000.0);
if (dumpFluxFlag)
@@ -270,7 +225,7 @@ int mainInspect(int argc, const char* argv[])
{
if (fmr.eof())
break;
bool b = fmr.readRawBit(clockPeriod);
bool b = fmr.readRawBit();
std::cout << (b ? 'X' : '-');
}

43
tests/betterassert.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef BETTERASSERT_H
#define BETTERASSERT_H
#include <assert.h>
namespace std {
template <class T>
static std::string to_string(const std::vector<T>& vector)
{
std::stringstream s;
s << "vector{";
bool first = true;
for (const T& t : vector)
{
if (!first)
s << ", ";
first = false;
s << t;
}
s << "}";
return s.str();
}
}
#undef assert
#define assertEquals(got, expected) assertImpl(__FILE__, __LINE__, got, expected)
template <class T>
static void assertImpl(const char filename[], int linenumber, T got, T expected)
{
if (got != expected)
{
std::cerr << "assertion failure at "
<< filename << ":" << linenumber
<< ": got " << std::to_string(got)
<< ", expected " << std::to_string(expected)
<< std::endl;
abort();
}
}
#endif

View File

@@ -2,84 +2,49 @@
#include "flags.h"
#include "fluxmap.h"
#include "decoders/fluxmapreader.h"
#include "betterassert.h"
#include <sstream>
FlagGroup flags { &fluxmapReaderFlags };
typedef std::vector<unsigned> ivector;
namespace std {
template <class T>
static std::string to_string(const std::vector<T>& vector)
{
std::stringstream s;
s << "vector{";
bool first = true;
for (const T& t : vector)
{
if (!first)
s << ", ";
first = false;
s << t;
}
s << "}";
return s.str();
}
}
#undef assert
#define assert(got, expected) assertImpl(__FILE__, __LINE__, got, expected)
template <class T>
static void assertImpl(const char filename[], int linenumber, T got, T expected)
{
if (got != expected)
{
std::cerr << "assertion failure at "
<< filename << ":" << linenumber
<< ": got " << std::to_string(got)
<< ", expected " << std::to_string(expected)
<< std::endl;
abort();
}
}
void test_patternconstruction()
{
{
FluxPattern fp(16, 0x0003);
assert(fp._bits, 16U);
assert(fp._intervals, ivector{ 1 });
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, ivector{ 1 });
}
{
FluxPattern fp(16, 0xc000);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 1, 15 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 1, 15 }));
}
{
FluxPattern fp(16, 0x0050);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 2, 5 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 2, 5 }));
}
{
FluxPattern fp(16, 0x0070);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 1, 1, 5 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 1, 1, 5 }));
}
{
FluxPattern fp(16, 0x0070);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 1, 1, 5 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 1, 1, 5 }));
}
{
FluxPattern fp(16, 0x0110);
assert(fp._bits, 16U);
assert(fp._intervals, (ivector{ 4, 5 }));
assertEquals(fp._bits, 16U);
assertEquals(fp._intervals, (ivector{ 4, 5 }));
}
}
@@ -92,16 +57,16 @@ void test_patternmatchingwithouttrailingzeros()
const unsigned closematch2[] = { 110, 110, 220, 110 };
FluxMatch match;
assert(fp.matches(&matching[4], match), true);
assert(match.intervals, 2U);
assertEquals(fp.matches(&matching[4], match), true);
assertEquals(match.intervals, 2U);
assert(fp.matches(&notmatching[4], match), false);
assertEquals(fp.matches(&notmatching[4], match), false);
assert(fp.matches(&closematch1[4], match), true);
assert(match.intervals, 2U);
assertEquals(fp.matches(&closematch1[4], match), true);
assertEquals(match.intervals, 2U);
assert(fp.matches(&closematch2[4], match), true);
assert(match.intervals, 2U);
assertEquals(fp.matches(&closematch2[4], match), true);
assertEquals(match.intervals, 2U);
}
void test_patternmatchingwithtrailingzeros()
@@ -113,16 +78,16 @@ void test_patternmatchingwithtrailingzeros()
const unsigned closematch2[] = { 110, 110, 220, 110, 220 };
FluxMatch match;
assert(fp.matches(&matching[5], match), true);
assert(match.intervals, 3U);
assertEquals(fp.matches(&matching[5], match), true);
assertEquals(match.intervals, 3U);
assert(fp.matches(&notmatching[5], match), false);
assertEquals(fp.matches(&notmatching[5], match), false);
assert(fp.matches(&closematch1[5], match), true);
assert(match.intervals, 3U);
assertEquals(fp.matches(&closematch1[5], match), true);
assertEquals(match.intervals, 3U);
assert(fp.matches(&closematch2[5], match), true);
assert(match.intervals, 3U);
assertEquals(fp.matches(&closematch2[5], match), true);
assertEquals(match.intervals, 3U);
}
int main(int argc, const char* argv[])

27
tests/kmedian.cc Normal file
View File

@@ -0,0 +1,27 @@
#include "globals.h"
#include <string.h>
#include "bytes.h"
#include "fluxmap.h"
#include "kmedian.h"
#include "fmt/format.h"
#include "betterassert.h"
static void testOptimalKMedian()
{
std::vector<float> points = { 22, 36, 35, 22, 22, 23, 37, 22, 35, 47 };
std::vector<float> clusters = optimalKMedian(points, 3);
assertEquals(clusters,
(std::vector<float>{
22, 35.5, 47
})
);
}
int main(int argc, const char* argv[])
{
testOptimalKMedian();
return 0;
}