From 323da8272a5f32bf0df19c6ed0e73aa62a17ede2 Mon Sep 17 00:00:00 2001
From: David Given <dg@cowlark.com>
Date: Fri, 21 Feb 2020 22:14:44 +0100
Subject: [PATCH] Hopefully add support for giving the reader a set of required
 sectors, so if one is missing then we can tell and the track can be reread.

---
 arch/ibm/ibm.h             | 10 ++++--
 arch/macintosh/decoder.cc  | 23 ++++++++++++++
 arch/macintosh/macintosh.h |  2 ++
 doc/disk-ibm.md            | 15 +++++++--
 lib/dataspec.cc            | 62 +++++++++++++++++++++++---------------
 lib/dataspec.h             | 32 ++++++++++++++++++++
 lib/decoders/decoders.cc   |  7 +++++
 lib/decoders/decoders.h    |  5 +++
 lib/reader.cc              | 12 ++++++--
 src/fe-readibm.cc          | 12 ++++++--
 tests/dataspec.cc          | 23 ++++++++++++++
 11 files changed, 167 insertions(+), 36 deletions(-)
diff --git a/arch/ibm/ibm.h b/arch/ibm/ibm.h
index 3b3f8739..d6dda0ff 100644
--- a/arch/ibm/ibm.h
+++ b/arch/ibm/ibm.h
@@ -32,18 +32,24 @@ struct IbmIdam
 class IbmDecoder : public AbstractDecoder
 {
 public:
-    IbmDecoder(unsigned sectorBase, bool ignoreSideByte=false):
+    IbmDecoder(unsigned sectorBase, bool ignoreSideByte=false,
+			const std::set<unsigned> requiredSectors=std::set<unsigned>()):
         _sectorBase(sectorBase),
-        _ignoreSideByte(ignoreSideByte)
+        _ignoreSideByte(ignoreSideByte),
+		_requiredSectors(requiredSectors)
     {}
 
     RecordType advanceToNextRecord();
     void decodeSectorRecord();
     void decodeDataRecord();
 
+	std::set<unsigned> requiredSectors(Track& track) const
+	{ return _requiredSectors; }
+
 private:
     unsigned _sectorBase;
     bool _ignoreSideByte;
+	std::set<unsigned> _requiredSectors;
     unsigned _currentSectorSize;
     unsigned _currentHeaderLength;
 };
diff --git a/arch/macintosh/decoder.cc b/arch/macintosh/decoder.cc
index 7b1e725d..711f975b 100644
--- a/arch/macintosh/decoder.cc
+++ b/arch/macintosh/decoder.cc
@@ -184,3 +184,26 @@ void MacintoshDecoder::decodeDataRecord()
     _sector->data.clear();
     _sector->data.writer().append(userData.slice(12, 512)).append(userData.slice(0, 12));
 }
+
+std::set<unsigned> MacintoshDecoder::requiredSectors(Track& track) const
+{
+	int count;
+	if (track.physicalTrack < 16)
+		count = 12;
+	else if (track.physicalTrack < 32)
+		count = 11;
+	else if (track.physicalTrack < 48)
+		count = 10;
+	else if (track.physicalTrack < 64)
+		count = 9;
+	else
+		count = 8;
+
+	std::set<unsigned> sectors;
+	do
+		sectors.insert(count);
+	while (count--);
+	return sectors;
+}
+
+
diff --git a/arch/macintosh/macintosh.h b/arch/macintosh/macintosh.h
index 64270b8c..231f7758 100644
--- a/arch/macintosh/macintosh.h
+++ b/arch/macintosh/macintosh.h
@@ -18,6 +18,8 @@ public:
     RecordType advanceToNextRecord();
     void decodeSectorRecord();
     void decodeDataRecord();
+
+	std::set<unsigned> requiredSectors(Track& track) const;
 };
 
 #endif
diff --git a/doc/disk-ibm.md b/doc/disk-ibm.md
index 3fa73f21..9aa93d51 100644
--- a/doc/disk-ibm.md
+++ b/doc/disk-ibm.md
@@ -41,19 +41,28 @@ of the disk image will vary depending on the format.
 
 Configuration options you'll want include:
 
-  - `--sector-id-base`: specifies the ID of the first sector; this defaults
-    to 1. Some formats (like the Acorn ones) start at 0. This can't be
+  - `--ibm-sector-id-base=N`: specifies the ID of the first sector; this defaults
+	to 1. Some formats (like the Acorn ones) start at 0. This can't be
 	autodetected because FluxEngine can't distinguish between a disk which
 	starts at sector 1 and a disk which starts at sector 0 but all the sector
 	0s are missing.
 
-  - `--ignore-side-byte`: each sector header describes the location of the
+  - `--ibm-ignore-side-byte=true|false`: each sector header describes the location of the
 	sector: sector ID, track and side. Some formats use the wrong side ID, so
 	the sectors on side 1 are labelled as belonging to side 0. This causes
 	FluxEngine to see duplicate sectors (as it can't distinguish between the
 	two sides). This option tells FluxEngine to ignore the side byte completely
 	and use the physical side instead.
 
+  - `--ibm-required-sectors=range`: if you know how many sectors to expect per
+	track, you can improve reads by telling FluxEngine what to expect here. If
+	a track is read and a sector on this list is _not_ present, then FluxEngine
+	assumes the read failed and will retry. This avoids the situation where
+	FluxEngine can't tell the difference between a sector missing because it's
+	bad or a sector missing because it was never written in the first place. If
+	sectors are seen outside the range here, it will still be read. You can use
+	the same syntax as for track specifiers: e.g. `0-9`, `0,1,2,3`, etc.
+
 
 Writing disks
 -------------
diff --git a/lib/dataspec.cc b/lib/dataspec.cc
index cbc04f3e..31ff9f64 100644
--- a/lib/dataspec.cc
+++ b/lib/dataspec.cc
@@ -15,34 +15,31 @@ std::vector<std::string> DataSpec::split(
 {
     std::vector<std::string> ret;
 
-    size_t start = 0;
-    size_t end = 0;
-    size_t len = 0;
-    do
-    {
-        end = s.find(delimiter,start); 
-        len = end - start;
-        std::string token = s.substr(start, len);
-        ret.emplace_back( token );
-        start += len + delimiter.length();
-    }
-    while (end != std::string::npos);
+	if (!s.empty())
+	{
+		size_t start = 0;
+		size_t end = 0;
+		size_t len = 0;
+		do
+		{
+			end = s.find(delimiter,start); 
+			len = end - start;
+			std::string token = s.substr(start, len);
+			ret.emplace_back( token );
+			start += len + delimiter.length();
+		}
+		while (end != std::string::npos);
+	}
+
     return ret;
 }
 
-DataSpec::Modifier DataSpec::parseMod(const std::string& spec)
+std::set<unsigned> DataSpec::parseRange(const std::string& data)
 {
-    static const std::regex MOD_REGEX("([a-z]*)=([-x+0-9,]*)");
-    static const std::regex DATA_REGEX("([0-9]+)(?:(?:-([0-9]+))|(?:\\+([0-9]+)))?(?:x([0-9]+))?");
+	static const std::regex DATA_REGEX("([0-9]+)(?:(?:-([0-9]+))|(?:\\+([0-9]+)))?(?:x([0-9]+))?");
 
-    std::smatch match;
-    if (!std::regex_match(spec, match, MOD_REGEX))
-        Error() << "invalid data modifier syntax '" << spec << "'";
-    
-    Modifier m;
-    m.name = match[1];
-    m.source = spec;
-    for (auto& data : split(match[2], ","))
+	std::set<unsigned> result;
+    for (auto& data : split(data, ","))
     {
         int start = 0;
         int count = 1;
@@ -64,9 +61,24 @@ DataSpec::Modifier DataSpec::parseMod(const std::string& spec)
             Error() << "mod '" << data << "' specifies an illegal quantity";
 
         for (int i = start; i < (start+count); i += step)
-            m.data.insert(i);
+            result.insert(i);
     }
 
+    return result;
+}
+
+DataSpec::Modifier DataSpec::parseMod(const std::string& spec)
+{
+	static const std::regex MOD_REGEX("([a-z]*)=([-x+0-9,]*)");
+
+    std::smatch match;
+    if (!std::regex_match(spec, match, MOD_REGEX))
+        Error() << "invalid data modifier syntax '" << spec << "'";
+    
+    Modifier m;
+    m.name = match[1];
+    m.source = spec;
+	m.data = parseRange(match[2]);
     return m;
 }
 
@@ -74,7 +86,7 @@ void DataSpec::set(const std::string& spec)
 {
     std::vector<std::string> words = split(spec, ":");
     if (words.size() == 0)
-        Error() << "empty data specification (you have to specify *something*)";
+		return;
 
     filename = words[0];
     if (words.size() > 1)
diff --git a/lib/dataspec.h b/lib/dataspec.h
index ff3aac34..e1e0e0d4 100644
--- a/lib/dataspec.h
+++ b/lib/dataspec.h
@@ -34,6 +34,8 @@ public:
 public:
     static std::vector<std::string> split(
         const std::string& s, const std::string& delimiter);
+	static std::set<unsigned> parseRange(const std::string& spec);
+
     static Modifier parseMod(const std::string& spec);
 
 public:
@@ -117,4 +119,34 @@ private:
     DataSpec _value;
 };
 
+class RangeFlag : public Flag
+{
+public:
+    RangeFlag(const std::vector<std::string>& names, const std::string helptext,
+            const std::string& defaultValue):
+        Flag(names, helptext),
+		_stringValue(defaultValue),
+        _value(DataSpec::parseRange(defaultValue))
+    {}
+
+    const std::set<unsigned>& get() const
+    { checkInitialised(); return _value; }
+
+    operator const std::set<unsigned>& () const
+    { return get(); }
+
+    bool hasArgument() const { return true; }
+    const std::string defaultValueAsString() const { return _stringValue; }
+
+    void set(const std::string& value)
+	{
+		_stringValue = value;
+		_value = DataSpec::parseRange(value);
+	}
+
+private:
+	std::string _stringValue;
+    std::set<unsigned> _value;
+};
+
 #endif
diff --git a/lib/decoders/decoders.cc b/lib/decoders/decoders.cc
index f5b1239e..fc107cde 100644
--- a/lib/decoders/decoders.cc
+++ b/lib/decoders/decoders.cc
@@ -88,3 +88,10 @@ void AbstractDecoder::pushRecord(const Fluxmap::Position& start, const Fluxmap::
     _track->rawrecords.push_back(record);
     _fmr->seek(here);
 }
+
+std::set<unsigned> AbstractDecoder::requiredSectors(Track& track) const
+{
+	static std::set<unsigned> empty;
+	return empty;
+}
+
diff --git a/lib/decoders/decoders.h b/lib/decoders/decoders.h
index fb9566af..672387d9 100644
--- a/lib/decoders/decoders.h
+++ b/lib/decoders/decoders.h
@@ -52,6 +52,11 @@ public:
     void seek(const Fluxmap::Position& pos)
     { return _fmr->seek(pos); } 
 
+	/* Returns a set of sectors required to exist on this track. If the reader
+	 * sees any missing, it will consider this to be an error and will retry
+	 * the read. */
+	virtual std::set<unsigned> requiredSectors(Track& track) const;
+
 protected:
     virtual void beginTrack() {};
     virtual RecordType advanceToNextRecord() = 0;
diff --git a/lib/reader.cc b/lib/reader.cc
index da0139fc..5b2f8de5 100644
--- a/lib/reader.cc
+++ b/lib/reader.cc
@@ -176,8 +176,6 @@ void readDiskCommand(AbstractDecoder& decoder)
 			decoder.decodeToSectors(*track);
 
 			std::cout << "       ";
-			if (!track->sectors.empty())
-			{
 				std::cout << fmt::format("{} records, {} sectors; ",
 					track->rawrecords.size(),
 					track->sectors.size());
@@ -193,9 +191,12 @@ void readDiskCommand(AbstractDecoder& decoder)
 				}
 
 				bool hasBadSectors = false;
+				std::set<unsigned> requiredSectors = decoder.requiredSectors(*track);
 				for (const auto& i : readSectors)
 				{
 					const auto& sector = i.second;
+					requiredSectors.erase(sector->logicalSector);
+
 					if (sector->status != Sector::OK)
 					{
 						std::cout << std::endl
@@ -204,6 +205,12 @@ void readDiskCommand(AbstractDecoder& decoder)
 						hasBadSectors = true;
 					}
 				}
+				for (unsigned logicalSector : requiredSectors)
+				{
+					std::cout << "\n"
+					          << "       Required sector " << logicalSector << " missing; ";
+					hasBadSectors = true;
+				}
 
 				if (hasBadSectors)
 					failures = false;
@@ -213,7 +220,6 @@ void readDiskCommand(AbstractDecoder& decoder)
 
 				if (!hasBadSectors)
 					break;
-			}
 
 			if (!track->fluxsource->retryable())
 				break;
diff --git a/src/fe-readibm.cc b/src/fe-readibm.cc
index 70530970..3a999420 100644
--- a/src/fe-readibm.cc
+++ b/src/fe-readibm.cc
@@ -7,28 +7,34 @@
 #include "sector.h"
 #include "sectorset.h"
 #include "record.h"
+#include "dataspec.h"
 #include "ibm/ibm.h"
 #include "fmt/format.h"
 
 static FlagGroup flags { &readerFlags };
 
 static IntFlag sectorIdBase(
-	{ "--sector-id-base" },
+	{ "--ibm-sector-id-base" },
 	"Sector ID of the first sector.",
 	1);
 
 static BoolFlag ignoreSideByte(
-	{ "--ignore-side-byte" },
+	{ "--ibm-ignore-side-byte" },
 	"Ignore the side byte in the sector ID, and use the physical side instead.",
 	false);
 
+static RangeFlag requiredSectors(
+	{ "--ibm-required-sectors" },
+	"A comma seperated list or range of sectors which must be on each track.",
+	"");
+
 int mainReadIBM(int argc, const char* argv[])
 {
 	setReaderDefaultSource(":t=0-79:s=0-1");
 	setReaderDefaultOutput("ibm.img");
     flags.parseFlags(argc, argv);
 
-	IbmDecoder decoder(sectorIdBase, ignoreSideByte);
+	IbmDecoder decoder(sectorIdBase, ignoreSideByte, requiredSectors);
 	readDiskCommand(decoder);
     return 0;
 }
diff --git a/tests/dataspec.cc b/tests/dataspec.cc
index 4c937cab..25cdd4cb 100644
--- a/tests/dataspec.cc
+++ b/tests/dataspec.cc
@@ -13,6 +13,28 @@ static void test_split(void)
         == std::vector<std::string>{"", "2", ""}));
     assert((DataSpec::split("2", ",")
         == std::vector<std::string>{"2"}));
+    assert((DataSpec::split("", ",")
+        == std::vector<std::string>()));
+}
+
+static void test_parserange(void)
+{
+    assert(DataSpec::parseRange("")
+        == std::set<unsigned>());
+    assert(DataSpec::parseRange("1")
+        == std::set<unsigned>({1}));
+    assert(DataSpec::parseRange("1,3,5")
+        == std::set<unsigned>({1, 3, 5}));
+    assert(DataSpec::parseRange("1,1,1")
+        == std::set<unsigned>({1}));
+    assert(DataSpec::parseRange("2-3")
+        == std::set<unsigned>({2, 3}));
+    assert(DataSpec::parseRange("2+3")
+        == std::set<unsigned>({2, 3, 4}));
+    assert(DataSpec::parseRange("2+3x2")
+        == std::set<unsigned>({2, 4}));
+    assert(DataSpec::parseRange("9,2+3x2")
+        == std::set<unsigned>({2, 4, 9}));
 }
 
 static void test_parsemod(void)
@@ -104,6 +126,7 @@ static void test_imagespec(void)
 int main(int argc, const char* argv[])
 {
     test_split();
+	test_parserange();
     test_parsemod();
     test_fluxspec();
     test_imagespec();