cochran.c

   1 #include <stdio.h>
   2 #include <string.h>
   3 #include <unistd.h>
   4 #include <sys/types.h>
   5 #include <sys/stat.h>
   6 #include <fcntl.h>
   7
   8 #include "dive.h"
   9 #include "file.h"
  10
  11 /*
  12  * The Cochran file format is designed to be annoying to read. It's roughly:
  13  *
  14  * 0x00000: room for 65534 4-byte words, giving the starting offsets
  15  *   of the dives themselves.
  16  *
  17  * 0x3fff8: the size of the file + 1
  18  * 0x3ffff: 0 (high 32 bits of filesize? Bogus: the offsets into the file
  19  *   are 32-bit, so it can't be a large file anyway)
  20  *
  21  * 0x40000: "block 0": the decoding block. The first byte is some random
  22  *   value (0x46 in the files I have access to), the next 200+ bytes or so
  23  *   are the "scrambling array" that needs to be added into the file
  24  *   contents to make sense of them.
  25  *
  26  * The descrambling array seems to be of some random size which is likely
  27  * determinable from the array somehow, the two test files I have it as
  28  * 230 bytes and 234 bytes respectively.
  29  */
  30 static unsigned int partial_decode(unsigned int start, unsigned int end,
  31                 const unsigned char *decode, unsigned offset, unsigned mod,
  32                 const unsigned char *buf, unsigned int size, unsigned char *dst)
  33 {
  34         unsigned i, sum = 0;
  35
  36         for (i = start ; i < end; i++) {
  37                 unsigned char d = decode[offset++];
  38                 if (i >= size)
  39                         break;
  40                 if (offset == mod)
  41                         offset = 0;
  42                 d += buf[i];
  43                 if (dst)
  44                         dst[i] = d;
  45                 sum += d;
  46         }
  47         return sum;
  48 }
  49
  50 /*
  51  * The decode buffer size can be figured out by simply trying our the
  52  * decode: we expect that the scrambled contents are largely random, and
  53  * thus tend to have half the bits set. Summing over the bytes is going
  54  * to give an average of 0x80 per byte.
  55  *
  56  * The decoded array is mostly full of zeroes, so the sum is lower.
  57  *
  58  * Works for me.
  59  */
  60 static int figure_out_modulus(const unsigned char *decode, const unsigned char *dive, unsigned int size)
  61 {
  62         int mod, best = -1;
  63         unsigned int min = ~0u;
  64
  65         if (size < 0x1000)
  66                 return best;
  67
  68         for (mod = 50; mod < 300; mod++) {
  69                 unsigned int sum;
  70
  71                 sum = partial_decode(0, 0x0fff, decode, 1, mod, dive, size, NULL);
  72                 if (sum < min) {
  73                         min = sum;
  74                         best = mod;
  75                 }
  76         }
  77         return best;
  78 }
  79
  80 #define hexchar(n) ("0123456789abcdef"[(n)&15])
  81
  82 static void show_line(unsigned offset, const unsigned char *data, unsigned size)
  83 {
  84         unsigned char bits;
  85         int i, off;
  86         char buffer[120];
  87
  88         if (size > 16)
  89                 size = 16;
  90
  91         bits = 0;
  92         memset(buffer, ' ', sizeof(buffer));
  93         off = sprintf(buffer, "%06x ", offset);
  94         for (i = 0; i < size; i++) {
  95                 char *hex = buffer + off + 3*i;
  96                 char *asc = buffer + off + 50 + i;
  97                 unsigned char byte = data[i];
  98
  99                 hex[0] = hexchar(byte>>4);
 100                 hex[1] = hexchar(byte);
 101                 bits |= byte;
 102                 if (byte < 32 || byte > 126)
 103                         byte = '.';
 104                 asc[0] = byte;
 105                 asc[1] = 0;
 106         }
 107
 108         if (bits)
 109                 puts(buffer);
 110 }
 111
 112 static void cochran_debug_write(const char *filename, int dive, const unsigned char *data, unsigned size)
 113 {
 114         int i;
 115         printf("\n%s, dive %d\n\n", filename, dive);
 116
 117         for (i = 0; i < size; i += 16) {
 118                 show_line(i, data + i, size - i);
 119         }
 120 }
 121
 122 static void parse_cochran_dive(const char *filename, int dive,
 123                 const unsigned char *decode, unsigned mod,
 124                 const unsigned char *in, unsigned size)
 125 {
 126         char *buf = malloc(size);
 127
 128         /*
 129          * The scrambling has odd boundaries. I think the boundaries
 130          * match some data structure size, but I don't know. They were
 131          * discovered the same way we dynamically discover the decode
 132          * size: automatically looking for least random output.
 133          *
 134          * The boundaries are also this confused "off-by-one" thing,
 135          * the same way the file size is off by one. It's as if the
 136          * cochran software forgot to write one byte at the beginning.
 137          */
 138         partial_decode(0     , 0x0fff, decode, 1, mod, in, size, buf);
 139         partial_decode(0x0fff, 0x1fff, decode, 0, mod, in, size, buf);
 140         partial_decode(0x1fff, 0x2fff, decode, 0, mod, in, size, buf);
 141         partial_decode(0x2fff, 0x48ff, decode, 0, mod, in, size, buf);
 142
 143         /*
 144          * This is not all the descrambling you need - the above are just
 145          * what appears to be the fixed-size blocks. The rest is also
 146          * scrambled, but there seems to be size differences in the data,
 147          * so this just descrambles part of it:
 148          */
 149         partial_decode(0x48ff, size, decode, 0, mod, in, size, buf);
 150
 151         cochran_debug_write(filename, dive, buf, size);
 152
 153         free(buf);
 154 }
 155
 156 int try_to_open_cochran(const char *filename, struct memblock *mem, GError **error)
 157 {
 158         unsigned int i;
 159         unsigned int mod;
 160         unsigned int *offsets, dive1, dive2;
 161         unsigned char *decode = mem->buffer + 0x40001;
 162
 163         if (mem->size < 0x40000)
 164                 return 0;
 165         offsets = mem->buffer;
 166         dive1 = offsets[0];
 167         dive2 = offsets[1];
 168         if (dive1 < 0x40000 || dive2 < dive1 || dive2 > mem->size)
 169                 return 0;
 170
 171         mod = figure_out_modulus(decode, mem->buffer + dive1, dive2 - dive1);
 172
 173         for (i = 0; i < 65534; i++) {
 174                 dive1 = offsets[i];
 175                 dive2 = offsets[i+1];
 176                 if (dive2 < dive1)
 177                         break;
 178                 if (dive2 > mem->size)
 179                         break;
 180                 parse_cochran_dive(filename, i, decode, mod, mem->buffer + dive1, dive2 - dive1);
 181         }
 182
 183         exit(0);
 184 }