cochran.c

   1 #include <stdio.h>
   2 #include <string.h>
   3 #include <unistd.h>
   4 #include <sys/types.h>
   5 #include <sys/stat.h>
   6 #include <fcntl.h>
   7
   8 #include "dive.h"
   9 #include "file.h"
  10
  11 #define DON
  12
  13 /*
  14  * The Cochran file format is designed to be annoying to read. It's roughly:
  15  *
  16  * 0x00000: room for 65534 4-byte words, giving the starting offsets
  17  *   of the dives themselves.
  18  *
  19  * 0x3fff8: the size of the file + 1
  20  * 0x3ffff: 0 (high 32 bits of filesize? Bogus: the offsets into the file
  21  *   are 32-bit, so it can't be a large file anyway)
  22  *
  23  * 0x40000: "block 0": the decoding block. The first byte is some random
  24  *   value (0x46 in the files I have access to), the next 200+ bytes or so
  25  *   are the "scrambling array" that needs to be added into the file
  26  *   contents to make sense of them.
  27  *
  28  * The descrambling array seems to be of some random size which is likely
  29  * determinable from the array somehow, the two test files I have it as
  30  * 230 bytes and 234 bytes respectively.
  31  */
  32 static unsigned int partial_decode(unsigned int start, unsigned int end,
  33                 const unsigned char *decode, unsigned offset, unsigned mod,
  34                 const unsigned char *buf, unsigned int size, unsigned char *dst)
  35 {
  36         unsigned i, sum = 0;
  37
  38         for (i = start ; i < end; i++) {
  39                 unsigned char d = decode[offset++];
  40                 if (i >= size)
  41                         break;
  42                 if (offset == mod)
  43                         offset = 0;
  44                 d += buf[i];
  45                 if (dst)
  46                         dst[i] = d;
  47                 sum += d;
  48         }
  49         return sum;
  50 }
  51
  52 /*
  53  * The decode buffer size can be figured out by simply trying our the
  54  * decode: we expect that the scrambled contents are largely random, and
  55  * thus tend to have half the bits set. Summing over the bytes is going
  56  * to give an average of 0x80 per byte.
  57  *
  58  * The decoded array is mostly full of zeroes, so the sum is lower.
  59  *
  60  * Works for me.
  61  */
  62 static int figure_out_modulus(const unsigned char *decode, const unsigned char *dive, unsigned int size)
  63 {
  64         int mod, best = -1;
  65         unsigned int min = ~0u;
  66
  67         if (size < 0x1000)
  68                 return best;
  69
  70         for (mod = 50; mod < 300; mod++) {
  71                 unsigned int sum;
  72
  73                 sum = partial_decode(0, 0x0fff, decode, 1, mod, dive, size, NULL);
  74                 if (sum < min) {
  75                         min = sum;
  76                         best = mod;
  77                 }
  78         }
  79         return best;
  80 }
  81
  82 #define hexchar(n) ("0123456789abcdef"[(n)&15])
  83
  84 static int show_line(unsigned offset, const unsigned char *data, unsigned size, int show_empty)
  85 {
  86         unsigned char bits;
  87         int i, off;
  88         char buffer[120];
  89
  90         if (size > 16)
  91                 size = 16;
  92
  93         bits = 0;
  94         memset(buffer, ' ', sizeof(buffer));
  95         off = sprintf(buffer, "%06x ", offset);
  96         for (i = 0; i < size; i++) {
  97                 char *hex = buffer + off + 3*i;
  98                 char *asc = buffer + off + 50 + i;
  99                 unsigned char byte = data[i];
 100
 101                 hex[0] = hexchar(byte>>4);
 102                 hex[1] = hexchar(byte);
 103                 bits |= byte;
 104                 if (byte < 32 || byte > 126)
 105                         byte = '.';
 106                 asc[0] = byte;
 107                 asc[1] = 0;
 108         }
 109
 110         if (bits) {
 111                 puts(buffer);
 112                 return 1;
 113         }
 114         if (show_empty)
 115                 puts("...");
 116         return 0;
 117 }
 118
 119 static void cochran_debug_write(const char *filename, const unsigned char *data, unsigned size)
 120 {
 121         int i, show = 1;
 122
 123         for (i = 0; i < size; i += 16)
 124                 show = show_line(i, data + i, size - i, show);
 125 }
 126
 127 static void parse_cochran_header(const char *filename,
 128                 const unsigned char *decode, unsigned mod,
 129                 const unsigned char *in, unsigned size)
 130 {
 131         char *buf = malloc(size);
 132
 133         /* Do the "null decode" using a one-byte decode array of '\0' */
 134         partial_decode(0    , 0x0b14, "", 0, 1, in, size, buf);
 135
 136         /*
 137          * The header scrambling is different form the dive
 138          * scrambling. Oh yay!
 139          */
 140         partial_decode(0x010e, 0x0b14, decode, 0, mod, in, size, buf);
 141         partial_decode(0x0b14, 0x1b14, decode, 0, mod, in, size, buf);
 142         partial_decode(0x1b14, 0x2b14, decode, 0, mod, in, size, buf);
 143         partial_decode(0x2b14, 0x3b14, decode, 0, mod, in, size, buf);
 144         partial_decode(0x3b14, 0x5414, decode, 0, mod, in, size, buf);
 145         partial_decode(0x5414,   size, decode, 0, mod, in, size, buf);
 146
 147         printf("\n%s, header\n\n", filename);
 148         cochran_debug_write(filename, buf, size);
 149
 150         free(buf);
 151 }
 152
 153 static void parse_cochran_dive(const char *filename, int dive,
 154                 const unsigned char *decode, unsigned mod,
 155                 const unsigned char *in, unsigned size)
 156 {
 157         char *buf = malloc(size);
 158 #ifdef DON
 159         unsigned int offset = 0x4a14;
 160 #else
 161         unsigned int offset = 0x4b14;
 162 #endif
 163
 164         /*
 165          * The scrambling has odd boundaries. I think the boundaries
 166          * match some data structure size, but I don't know. They were
 167          * discovered the same way we dynamically discover the decode
 168          * size: automatically looking for least random output.
 169          *
 170          * The boundaries are also this confused "off-by-one" thing,
 171          * the same way the file size is off by one. It's as if the
 172          * cochran software forgot to write one byte at the beginning.
 173          */
 174         partial_decode(0     , 0x0fff, decode, 1, mod, in, size, buf);
 175         partial_decode(0x0fff, 0x1fff, decode, 0, mod, in, size, buf);
 176         partial_decode(0x1fff, 0x2fff, decode, 0, mod, in, size, buf);
 177         partial_decode(0x2fff, 0x48ff, decode, 0, mod, in, size, buf);
 178
 179         /*
 180          * This is not all the descrambling you need - the above are just
 181          * what appears to be the fixed-size blocks. The rest is also
 182          * scrambled, but there seems to be size differences in the data,
 183          * so this just descrambles part of it:
 184          */
 185         partial_decode(0x48ff, offset, decode, 0, mod, in, size, buf);
 186         partial_decode(offset,   size, decode, 0, mod, in, size, buf);
 187
 188         printf("\n%s, dive %d\n\n", filename, dive);
 189         cochran_debug_write(filename, buf, size);
 190
 191         free(buf);
 192 }
 193
 194 int try_to_open_cochran(const char *filename, struct memblock *mem, GError **error)
 195 {
 196         unsigned int i;
 197         unsigned int mod;
 198         unsigned int *offsets, dive1, dive2;
 199         unsigned char *decode = mem->buffer + 0x40001;
 200
 201         if (mem->size < 0x40000)
 202                 return 0;
 203         offsets = mem->buffer;
 204         dive1 = offsets[0];
 205         dive2 = offsets[1];
 206         if (dive1 < 0x40000 || dive2 < dive1 || dive2 > mem->size)
 207                 return 0;
 208
 209         mod = figure_out_modulus(decode, mem->buffer + dive1, dive2 - dive1);
 210
 211         parse_cochran_header(filename, decode, mod, mem->buffer + 0x40000, dive1 - 0x40000);
 212
 213         for (i = 0; i < 65534; i++) {
 214                 dive1 = offsets[i];
 215                 dive2 = offsets[i+1];
 216                 if (dive2 < dive1)
 217                         break;
 218                 if (dive2 > mem->size)
 219                         break;
 220                 parse_cochran_dive(filename, i+1, decode, mod, mem->buffer + dive1, dive2 - dive1);
 221         }
 222
 223         exit(0);
 224 }