X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=scrape-item-data.py;h=84def9232b6cf2ddaedc1e3353ed00e17d82f58a;hb=8dcb2ed5219429c1c7d543c422a7471c18917f55;hp=71e573a2c96b74c76d2ca4812564cf91d2a6c6cc;hpb=8bde98fb78392e80922f9d86f5ee28565848dfad;p=poefilter.git diff --git a/scrape-item-data.py b/scrape-item-data.py index 71e573a..84def92 100755 --- a/scrape-item-data.py +++ b/scrape-item-data.py @@ -3,6 +3,7 @@ import requests import html.parser import codecs +import argparse class Item: def __init__(self, name, kind): @@ -37,14 +38,15 @@ class Item: return 0 def check_upgrade(self, other): - if self.droplevel<=other.droplevel: - return if not self.is_compatible(other): return mods = self.compare_mods(other) attrs = self.compare_attrs(other) + if self.droplevel0 or (mods==0 and attrs>0): other.upgrades["mods"] = self @@ -134,7 +136,8 @@ class ItemDataParser(html.parser.HTMLParser): self.mod_names = [] elif tag=="td": self.column += 1 - self.in_cell = True + if self.in_items_table: + self.in_cell = True elif tag=="h1": self.in_heading = True @@ -149,6 +152,10 @@ class ItemDataParser(html.parser.HTMLParser): self.current_item.check_upgrade(it) elif tag=="h1": self.in_heading = False + elif tag=="html": + for i in range(1, len(self.items)): + for j in range(i): + self.items[j].check_upgrade(self.items[i]) def handle_data(self, data): data = data.strip() @@ -157,8 +164,10 @@ class ItemDataParser(html.parser.HTMLParser): if self.in_heading: self.current_heading = data + if self.current_heading=="Staff": + self.current_heading = "Stave" self.headings.append(self.current_heading) - elif self.in_items_table and self.in_cell: + elif self.in_cell: if self.mod_row: if self.column==1: self.mod_names.append(data) @@ -237,11 +246,59 @@ def write_best_category(out, prefix, items, steps): out.write("\t\t};\n") out.write("\t};\n};\n") +def print_debug(items): + upgrades = set() + for it in items: + for u in it.upgrades.values(): + if u: + upgrades.add(u.name) + + printed = set() + + queue = [] + while 1: + for it in items: + if it.name not in upgrades and it.name not in printed: + queue.append(it) + break + + if not queue: + break + + while queue: + it = queue.pop(0) + if it.name in printed: + continue + printed.add(it.name) + + print(it.name) + print(" level: {}".format(it.droplevel)) + if isinstance(it, Armor): + print(" armor: {}".format(it.armor)) + print(" evasion: {}".format(it.evasion)) + print(" energy shield: {}".format(it.energy_shield)) + elif isinstance(it, Weapon): + print(" dps: {}".format(it.dps)) + for n, v in it.mods.items(): + print(" {}: {}".format(n, v)) + for t, u in it.upgrades.items(): + if u: + print(" upgrade {}: {}".format(t, u.name)) + if u.name not in printed: + queue.insert(0, u) + def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-g", "--debug", action="store_true", dest="debug") + args = parser.parse_args() + r = requests.get("https://www.pathofexile.com/item-data/armour") p = ArmorDataParser() p.feed(codecs.decode(r.content, r.encoding)) + if args.debug: + print_debug(p.items) + types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield), "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield), "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield), @@ -263,6 +320,13 @@ def main(): out.write('\t\tbase_type "{}";\n'.format(it.name)) out.write("\t};\n};\n") + for b in ("best", "second"): + out.write('category "armor.{}_at_level"\n'.format(b)) + out.write('{\n\tor\n\t{\n') + for t in types.keys(): + out.write('\t\tcategory "armor.{}.{}_at_level";\n'.format(t, b)) + out.write("\t};\n};\n") + out.write('category "armor"\n{\n\tor\n\t{\n') for h in p.headings: out.write('\t\tclass "{}";\n'.format(h)) @@ -272,6 +336,9 @@ def main(): p = WeaponDataParser() p.feed(codecs.decode(r.content, r.encoding)) + if args.debug: + print_debug(p.items) + out = open("weapons.txt", "w") for h in p.headings: @@ -280,6 +347,13 @@ def main(): for i in range(1, 3): write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i) + for b in ("best", "second"): + out.write('category "weapon.{}_at_level"\n'.format(b)) + out.write('{\n\tor\n\t{\n') + for h in p.headings: + out.write('\t\tcategory "weapon.{}.{}_at_level";\n'.format(h.lower().replace(' ', '_'), b)) + out.write("\t};\n};\n") + out.write('category "weapon"\n{\n\tor\n\t{\n') for h in p.headings: out.write('\t\tclass "{}";\n'.format(h))