cochran.c

   1 #include <stdio.h>
   2 #include <unistd.h>
   3 #include <sys/types.h>
   4 #include <sys/stat.h>
   5 #include <fcntl.h>
   6
   7 #include "dive.h"
   8 #include "file.h"
   9
  10 /*
  11  * The Cochran file format is designed to be annoying to read. It's roughly:
  12  *
  13  * 0x00000: room for 65534 4-byte words, giving the starting offsets
  14  *   of the dives themselves.
  15  *
  16  * 0x3fff8: the size of the file + 1
  17  * 0x3ffff: 0 (high 32 bits of filesize? Bogus: the offsets into the file
  18  *   are 32-bit, so it can't be a large file anyway)
  19  *
  20  * 0x40000: "block 0": the decoding block. The first byte is some random
  21  *   value (0x46 in the files I have access to), the next 200+ bytes or so
  22  *   are the "scrambling array" that needs to be added into the file
  23  *   contents to make sense of them.
  24  *
  25  * The descrambling array seems to be of some random size which is likely
  26  * determinable from the array somehow, the two test files I have it as
  27  * 230 bytes and 234 bytes respectively.
  28  */
  29 static unsigned int partial_decode(unsigned int start, unsigned int end,
  30                 const unsigned char *decode, unsigned offset, unsigned mod,
  31                 const unsigned char *buf, unsigned int size, unsigned char *dst)
  32 {
  33         unsigned i, sum = 0;
  34
  35         for (i = start ; i < end; i++) {
  36                 unsigned char d = decode[offset++];
  37                 if (i >= size)
  38                         break;
  39                 if (offset == mod)
  40                         offset = 0;
  41                 d += buf[i];
  42                 if (dst)
  43                         dst[i] = d;
  44                 sum += d;
  45         }
  46         return sum;
  47 }
  48
  49 /*
  50  * The decode buffer size can be figured out by simply trying our the
  51  * decode: we expect that the scrambled contents are largely random, and
  52  * thus tend to have half the bits set. Summing over the bytes is going
  53  * to give an average of 0x80 per byte.
  54  *
  55  * The decoded array is mostly full of zeroes, so the sum is lower.
  56  *
  57  * Works for me.
  58  */
  59 static int figure_out_modulus(const unsigned char *decode, const unsigned char *dive, unsigned int size)
  60 {
  61         int mod, best = -1;
  62         unsigned int min = ~0u;
  63
  64         if (size < 0x1000)
  65                 return best;
  66
  67         for (mod = 50; mod < 300; mod++) {
  68                 unsigned int sum;
  69
  70                 sum = partial_decode(0, 0x0fff, decode, 1, mod, dive, size, NULL);
  71                 if (sum < min) {
  72                         min = sum;
  73                         best = mod;
  74                 }
  75         }
  76         return best;
  77 }
  78
  79 static void cochran_debug_write(int dive, const unsigned char *data, unsigned size)
  80 {
  81         char buffer[60];
  82         int fd;
  83
  84         snprintf(buffer, sizeof(buffer), "cochran.%d.out", dive);
  85         fd = open(buffer, O_CREAT | O_TRUNC | O_WRONLY, 0666);
  86         if (fd >= 0) {
  87                 write(fd, data, size);
  88                 close(fd);
  89         }
  90 }
  91
  92 static void parse_cochran_dive(int dive, const unsigned char *decode, unsigned mod,
  93                 const unsigned char *in, unsigned size)
  94 {
  95         char *buf = malloc(size);
  96
  97         /*
  98          * The scrambling has odd boundaries. I think the boundaries
  99          * match some data structure size, but I don't know. They were
 100          * discovered the same way we dynamically discover the decode
 101          * size: automatically looking for least random output.
 102          *
 103          * The boundaries are also this confused "off-by-one" thing,
 104          * the same way the file size is off by one. It's as if the
 105          * cochran software forgot to write one byte at the beginning.
 106          */
 107         partial_decode(0     , 0x0fff, decode, 1, mod, in, size, buf);
 108         partial_decode(0x0fff, 0x1fff, decode, 0, mod, in, size, buf);
 109         partial_decode(0x1fff, 0x2fff, decode, 0, mod, in, size, buf);
 110         partial_decode(0x2fff, 0x48ff, decode, 0, mod, in, size, buf);
 111
 112         /*
 113          * This is not all the descrambling you need - the above are just
 114          * what appears to be the fixed-size blocks. The rest is also
 115          * scrambled, but there seems to be size differences in the data,
 116          * so this just descrambles part of it:
 117          */
 118         partial_decode(0x48ff, size, decode, 0, mod, in, size, buf);
 119
 120         cochran_debug_write(dive, buf, size);
 121
 122         free(buf);
 123 }
 124
 125 int try_to_open_cochran(const char *filename, struct memblock *mem, GError **error)
 126 {
 127         unsigned int i;
 128         unsigned int mod;
 129         unsigned int *offsets, dive1, dive2;
 130         unsigned char *decode = mem->buffer + 0x40001;
 131
 132         if (mem->size < 0x40000)
 133                 return 0;
 134         offsets = mem->buffer;
 135         dive1 = offsets[0];
 136         dive2 = offsets[1];
 137         if (dive1 < 0x40000 || dive2 < dive1 || dive2 > mem->size)
 138                 return 0;
 139
 140         mod = figure_out_modulus(decode, mem->buffer + dive1, dive2 - dive1);
 141
 142         for (i = 0; i < 65534; i++) {
 143                 dive1 = offsets[i];
 144                 dive2 = offsets[i+1];
 145                 if (dive2 < dive1)
 146                         break;
 147                 if (dive2 > mem->size)
 148                         break;
 149                 parse_cochran_dive(i, decode, mod, mem->buffer + dive1, dive2 - dive1);
 150         }
 151
 152         return 1;
 153 }