]> git.tdb.fi Git - poefilter.git/blobdiff - scrape-item-data.py
Correctly merge icon and light beam appearances
[poefilter.git] / scrape-item-data.py
index 3040063b75b5f32c1fac2021c16e2381dc174800..84def9232b6cf2ddaedc1e3353ed00e17d82f58a 100755 (executable)
@@ -3,16 +3,56 @@
 import requests
 import html.parser
 import codecs
+import argparse
 
 class Item:
        def __init__(self, name, kind):
                self.name = name
                self.kind = kind
                self.droplevel = 0
-               self.upgrade = None
-
-       def is_upgrade_for(self, other):
-               return self.kind==other.kind and self.droplevel>other.droplevel
+               self.mods = {}
+               self.upgrades = {"attrs":None, "mods":None}
+
+       def is_compatible(self, other):
+               return self.kind==other.kind
+
+       def compare_mods(self, other):
+               result = 0
+               for n, v in self.mods.items():
+                       ov = other.mods.get(n, 0)
+                       if v<ov:
+                               return -1
+                       elif v>ov:
+                               result = 1
+
+               for on, ov in other.mods.items():
+                       v = self.mods.get(on, 0)
+                       if v<ov:
+                               return -1
+                       elif v>ov:
+                               result = 1
+               
+               return result
+
+       def compare_attrs(self, other):
+               return 0
+
+       def check_upgrade(self, other):
+               if not self.is_compatible(other):
+                       return
+
+               mods = self.compare_mods(other)
+               attrs = self.compare_attrs(other)
+
+               if self.droplevel<other.droplevel and (mods<0 or attrs<0):
+                       return
+
+               if not other.upgrades["mods"]:
+                       if mods>0 or (mods==0 and attrs>0):
+                               other.upgrades["mods"] = self
+               if not other.upgrades["attrs"]:
+                       if attrs>0 or (attrs==0 and mods>0):
+                               other.upgrades["attrs"] = self
 
 class Armor(Item):
        def __init__(self, name, kind):
@@ -22,7 +62,7 @@ class Armor(Item):
                self.evasion = 0
                self.energy_shield = 0
 
-       def is_upgrade_for(self, other):
+       def is_compatible(self, other):
                if (self.armor!=0)!=(other.armor!=0):
                        return False
                if (self.evasion!=0)!=(other.evasion!=0):
@@ -30,7 +70,24 @@ class Armor(Item):
                if (self.energy_shield!=0)!=(other.energy_shield!=0):
                        return False
 
-               return super(Armor, self).is_upgrade_for(other)
+               return super(Armor, self).is_compatible(other)
+
+       def compare_attrs(self, other):
+               if self.armor<other.armor:
+                       return -1
+               if self.evasion<other.evasion:
+                       return -1
+               if self.energy_shield<other.energy_shield:
+                       return -1
+
+               if self.armor>other.armor:
+                       return 1
+               if self.evasion>other.evasion:
+                       return 1
+               if self.energy_shield>other.energy_shield:
+                       return 1
+
+               return 0
 
 class Weapon(Item):
        def __init__(self, name, kind):
@@ -39,6 +96,14 @@ class Weapon(Item):
                self.speed = 0
                self.dps = 0
 
+       def compare_attrs(self, other):
+               if self.dps<other.dps:
+                       return -1
+               if self.dps>other.dps:
+                       return 1
+
+               return 0
+
 class ItemDataParser(html.parser.HTMLParser):
        def __init__(self):
                super(ItemDataParser, self).__init__()
@@ -46,12 +111,14 @@ class ItemDataParser(html.parser.HTMLParser):
                self.in_items_table = False
                self.column = 0
                self.in_cell = False
-               self.ignore_row = False
-               self.items = {}
+               self.items = []
                self.current_item = None
                self.in_heading = False
                self.headings = []
                self.current_heading = None
+               self.mod_row = False
+               self.mod_names = []
+               self.mod_index = 0
 
        def handle_starttag(self, tag, attrs):
                if tag=="table":
@@ -59,15 +126,18 @@ class ItemDataParser(html.parser.HTMLParser):
                                if n=="class" and v=="itemDataTable":
                                        self.in_items_table = True
                elif tag=="tr":
-                       self.ignore_row = False
+                       self.mod_row = False
                        for n, v in attrs:
                                if n=="class" and v.endswith("_mod"):
-                                       self.ignore_row = True
+                                       self.mod_row = True
                        self.column = 0
-                       self.current_item = None
+                       if not self.mod_row:
+                               self.current_item = None
+                       self.mod_names = []
                elif tag=="td":
                        self.column += 1
-                       self.in_cell = True
+                       if self.in_items_table:
+                               self.in_cell = True
                elif tag=="h1":
                        self.in_heading = True
 
@@ -77,27 +147,48 @@ class ItemDataParser(html.parser.HTMLParser):
                elif tag=="td":
                        self.in_cell = False
                elif tag=="tr":
-                       if self.current_item:
-                               for it in self.items.values():
-                                       if not it.upgrade and self.current_item.is_upgrade_for(it):
-                                               it.upgrade = self.current_item
-                                               break
+                       if self.current_item and self.mod_row:
+                               for it in self.items:
+                                       self.current_item.check_upgrade(it)
                elif tag=="h1":
                        self.in_heading = False
+               elif tag=="html":
+                       for i in range(1, len(self.items)):
+                               for j in range(i):
+                                       self.items[j].check_upgrade(self.items[i])
 
        def handle_data(self, data):
                data = data.strip()
+               if not data:
+                       return
+
                if self.in_heading:
                        self.current_heading = data
+                       if self.current_heading=="Staff":
+                               self.current_heading = "Stave"
                        self.headings.append(self.current_heading)
-               elif self.in_items_table and self.in_cell and data and not self.ignore_row:
-                       if self.column==2:
-                               self.current_item = self.create_item(data, self.current_heading)
-                               self.items[data] = self.current_item
-                       elif self.column==3:
-                               self.current_item.droplevel = int(data)
-                       elif self.column>=4:
-                               self.handle_value(self.column, data)
+               elif self.in_cell:
+                       if self.mod_row:
+                               if self.column==1:
+                                       self.mod_names.append(data)
+                                       self.mod_index = 0
+                               elif self.column==2:
+                                       if " to " in data:
+                                               value = int(data.split(" to ", 1)[1])
+                                       else:
+                                               value = int(data)
+                                       name = self.mod_names[self.mod_index]
+                                       if name!="From Armour Movement Speed +%":
+                                               self.current_item.mods[name] = value
+                                       self.mod_index += 1
+                       else:
+                               if self.column==2:
+                                       self.current_item = self.create_item(data, self.current_heading)
+                                       self.items.append(self.current_item)
+                               elif self.column==3:
+                                       self.current_item.droplevel = int(data)
+                               elif self.column>=4:
+                                       self.handle_value(self.column, data)
 
        def create_item(self, name, kind):
                pass
@@ -127,32 +218,87 @@ class WeaponDataParser(ItemDataParser):
                elif column==6:
                        self.current_item.dps = float(data)
 
+def get_upgrade_level(item, steps):
+       level = 0
+       for p in item.upgrades.keys():
+               upgrade = item
+               for i in range(steps):
+                       upgrade = upgrade.upgrades.get(p)
+                       if not upgrade:
+                               return 0
+               level = max(level, upgrade.droplevel)
+       return level
+
 def write_best_category(out, prefix, items, steps):
        best = ["best", "second"]
 
        out.write('category "{}.{}_at_level"\n'.format(prefix, best[steps-1]))
        out.write("{\n\tor\n\t{\n")
        for it in items:
-               upgrade = it
-               for k in range(steps):
-                       upgrade = upgrade.upgrade
-                       if not upgrade:
-                               break
+               upgrade_level = get_upgrade_level(it, steps)
                out.write("\t\tand\n\t\t{\n")
-               if upgrade:
+               if upgrade_level:
                        out.write('\t\t\tbase_type "{}";\n'.format(it.name))
-                       out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade.droplevel-1))
+                       out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade_level-1))
                else:
                        out.write('\t\t\tbase_type "{}";\n'.format(it.name))
                        out.write("\t\t\tmin_item_level {};\n".format(it.droplevel))
                out.write("\t\t};\n")
        out.write("\t};\n};\n")
 
+def print_debug(items):
+       upgrades = set()
+       for it in items:
+               for u in it.upgrades.values():
+                       if u:
+                               upgrades.add(u.name)
+
+       printed = set()
+
+       queue = []
+       while 1:
+               for it in items:
+                       if it.name not in upgrades and it.name not in printed:
+                               queue.append(it)
+                               break
+
+               if not queue:
+                       break
+
+               while queue:
+                       it = queue.pop(0)
+                       if it.name in printed:
+                               continue
+                       printed.add(it.name)
+
+                       print(it.name)
+                       print("  level: {}".format(it.droplevel))
+                       if isinstance(it, Armor):
+                               print("  armor: {}".format(it.armor))
+                               print("  evasion: {}".format(it.evasion))
+                               print("  energy shield: {}".format(it.energy_shield))
+                       elif isinstance(it, Weapon):
+                               print("  dps: {}".format(it.dps))
+                       for n, v in it.mods.items():
+                               print("  {}: {}".format(n, v))
+                       for t, u in it.upgrades.items():
+                               if u:
+                                       print("  upgrade {}: {}".format(t, u.name))
+                                       if u.name not in printed:
+                                               queue.insert(0, u)
+
 def main():
+       parser = argparse.ArgumentParser()
+       parser.add_argument("-g", "--debug", action="store_true", dest="debug")
+       args = parser.parse_args()
+
        r = requests.get("https://www.pathofexile.com/item-data/armour")
        p = ArmorDataParser()
        p.feed(codecs.decode(r.content, r.encoding))
 
+       if args.debug:
+               print_debug(p.items)
+
        types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield),
                "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield),
                "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield),
@@ -163,27 +309,10 @@ def main():
        out = open("armor.txt", "w")
 
        for t, f in types.items():
-               items = [i for i in p.items.values() if f(i)]
+               items = [i for i in p.items if f(i)]
 
                for i in range(1, 3):
                        write_best_category(out, "armor.{}".format(t), items, i)
-                       """out.write('category "armor.{}.{}_at_level"\n'.format(t, best[i-1]))
-                       out.write("{\n\tor\n\t{\n")
-                       for it in items:
-                               next_level = 0
-                               n = it.name
-                               for k in range(i):
-                                       n = p.upgrades.get(n, "")
-                               if n:
-                                       next_level = p.items[n].droplevel
-                               if next_level:
-                                       out.write("\t\tand\n\t\t{\n")
-                                       out.write('\t\t\tbase_type "{}";\n'.format(it.name))
-                                       out.write("\t\t\tmax_item_level {};\n".format(next_level-1))
-                                       out.write("\t\t};\n")
-                               else:
-                                       out.write('\t\tbase_type "{}";\n'.format(it.name))
-                       out.write("\t};\n};\n")"""
 
                out.write('category "armor.{}"\n'.format(t))
                out.write("{\n\tor\n\t{\n")
@@ -191,6 +320,13 @@ def main():
                        out.write('\t\tbase_type "{}";\n'.format(it.name))
                out.write("\t};\n};\n")
 
+       for b in ("best", "second"):
+               out.write('category "armor.{}_at_level"\n'.format(b))
+               out.write('{\n\tor\n\t{\n')
+               for t in types.keys():
+                       out.write('\t\tcategory "armor.{}.{}_at_level";\n'.format(t, b))
+               out.write("\t};\n};\n")
+
        out.write('category "armor"\n{\n\tor\n\t{\n')
        for h in p.headings:
                out.write('\t\tclass "{}";\n'.format(h))
@@ -200,14 +336,24 @@ def main():
        p = WeaponDataParser()
        p.feed(codecs.decode(r.content, r.encoding))
 
+       if args.debug:
+               print_debug(p.items)
+
        out = open("weapons.txt", "w")
 
        for h in p.headings:
-               items = [i for i in p.items.values() if i.kind==h]
+               items = [i for i in p.items if i.kind==h]
 
                for i in range(1, 3):
                        write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i)
 
+       for b in ("best", "second"):
+               out.write('category "weapon.{}_at_level"\n'.format(b))
+               out.write('{\n\tor\n\t{\n')
+               for h in p.headings:
+                       out.write('\t\tcategory "weapon.{}.{}_at_level";\n'.format(h.lower().replace(' ', '_'), b))
+               out.write("\t};\n};\n")
+
        out.write('category "weapon"\n{\n\tor\n\t{\n')
        for h in p.headings:
                out.write('\t\tclass "{}";\n'.format(h))