]> git.tdb.fi Git - poefilter.git/blob - scrape-item-data.py
Created combined best/second_at_level categories
[poefilter.git] / scrape-item-data.py
1 #!/usr/bin/python3
2
3 import requests
4 import html.parser
5 import codecs
6 import argparse
7
8 class Item:
9         def __init__(self, name, kind):
10                 self.name = name
11                 self.kind = kind
12                 self.droplevel = 0
13                 self.mods = {}
14                 self.upgrades = {"attrs":None, "mods":None}
15
16         def is_compatible(self, other):
17                 return self.kind==other.kind
18
19         def compare_mods(self, other):
20                 result = 0
21                 for n, v in self.mods.items():
22                         ov = other.mods.get(n, 0)
23                         if v<ov:
24                                 return -1
25                         elif v>ov:
26                                 result = 1
27
28                 for on, ov in other.mods.items():
29                         v = self.mods.get(on, 0)
30                         if v<ov:
31                                 return -1
32                         elif v>ov:
33                                 result = 1
34                 
35                 return result
36
37         def compare_attrs(self, other):
38                 return 0
39
40         def check_upgrade(self, other):
41                 if not self.is_compatible(other):
42                         return
43
44                 mods = self.compare_mods(other)
45                 attrs = self.compare_attrs(other)
46
47                 if self.droplevel<other.droplevel and (mods<0 or attrs<0):
48                         return
49
50                 if not other.upgrades["mods"]:
51                         if mods>0 or (mods==0 and attrs>0):
52                                 other.upgrades["mods"] = self
53                 if not other.upgrades["attrs"]:
54                         if attrs>0 or (attrs==0 and mods>0):
55                                 other.upgrades["attrs"] = self
56
57 class Armor(Item):
58         def __init__(self, name, kind):
59                 super(Armor, self).__init__(name, kind)
60
61                 self.armor = 0
62                 self.evasion = 0
63                 self.energy_shield = 0
64
65         def is_compatible(self, other):
66                 if (self.armor!=0)!=(other.armor!=0):
67                         return False
68                 if (self.evasion!=0)!=(other.evasion!=0):
69                         return False
70                 if (self.energy_shield!=0)!=(other.energy_shield!=0):
71                         return False
72
73                 return super(Armor, self).is_compatible(other)
74
75         def compare_attrs(self, other):
76                 if self.armor<other.armor:
77                         return -1
78                 if self.evasion<other.evasion:
79                         return -1
80                 if self.energy_shield<other.energy_shield:
81                         return -1
82
83                 if self.armor>other.armor:
84                         return 1
85                 if self.evasion>other.evasion:
86                         return 1
87                 if self.energy_shield>other.energy_shield:
88                         return 1
89
90                 return 0
91
92 class Weapon(Item):
93         def __init__(self, name, kind):
94                 super(Weapon, self).__init__(name, kind)
95
96                 self.speed = 0
97                 self.dps = 0
98
99         def compare_attrs(self, other):
100                 if self.dps<other.dps:
101                         return -1
102                 if self.dps>other.dps:
103                         return 1
104
105                 return 0
106
107 class ItemDataParser(html.parser.HTMLParser):
108         def __init__(self):
109                 super(ItemDataParser, self).__init__()
110
111                 self.in_items_table = False
112                 self.column = 0
113                 self.in_cell = False
114                 self.items = []
115                 self.current_item = None
116                 self.in_heading = False
117                 self.headings = []
118                 self.current_heading = None
119                 self.mod_row = False
120                 self.mod_names = []
121                 self.mod_index = 0
122
123         def handle_starttag(self, tag, attrs):
124                 if tag=="table":
125                         for n, v in attrs:
126                                 if n=="class" and v=="itemDataTable":
127                                         self.in_items_table = True
128                 elif tag=="tr":
129                         self.mod_row = False
130                         for n, v in attrs:
131                                 if n=="class" and v.endswith("_mod"):
132                                         self.mod_row = True
133                         self.column = 0
134                         if not self.mod_row:
135                                 self.current_item = None
136                         self.mod_names = []
137                 elif tag=="td":
138                         self.column += 1
139                         self.in_cell = True
140                 elif tag=="h1":
141                         self.in_heading = True
142
143         def handle_endtag(self, tag):
144                 if tag=="table":
145                         self.in_items_table = False
146                 elif tag=="td":
147                         self.in_cell = False
148                 elif tag=="tr":
149                         if self.current_item and self.mod_row:
150                                 for it in self.items:
151                                         self.current_item.check_upgrade(it)
152                 elif tag=="h1":
153                         self.in_heading = False
154                 elif tag=="html":
155                         for i in range(1, len(self.items)):
156                                 for j in range(i):
157                                         self.items[j].check_upgrade(self.items[i])
158
159         def handle_data(self, data):
160                 data = data.strip()
161                 if not data:
162                         return
163
164                 if self.in_heading:
165                         self.current_heading = data
166                         if self.current_heading=="Staff":
167                                 self.current_heading = "Stave"
168                         self.headings.append(self.current_heading)
169                 elif self.in_items_table and self.in_cell:
170                         if self.mod_row:
171                                 if self.column==1:
172                                         self.mod_names.append(data)
173                                         self.mod_index = 0
174                                 elif self.column==2:
175                                         if " to " in data:
176                                                 value = int(data.split(" to ", 1)[1])
177                                         else:
178                                                 value = int(data)
179                                         name = self.mod_names[self.mod_index]
180                                         if name!="From Armour Movement Speed +%":
181                                                 self.current_item.mods[name] = value
182                                         self.mod_index += 1
183                         else:
184                                 if self.column==2:
185                                         self.current_item = self.create_item(data, self.current_heading)
186                                         self.items.append(self.current_item)
187                                 elif self.column==3:
188                                         self.current_item.droplevel = int(data)
189                                 elif self.column>=4:
190                                         self.handle_value(self.column, data)
191
192         def create_item(self, name, kind):
193                 pass
194
195         def handle_value(self, column, data):
196                 pass
197
198 class ArmorDataParser(ItemDataParser):
199         def create_item(self, name, kind):
200                 return Armor(name, kind)
201
202         def handle_value(self, column, data):
203                 if column==4:
204                         self.current_item.armor = int(data)
205                 elif column==5:
206                         self.current_item.evasion = int(data)
207                 elif column==6:
208                         self.current_item.energy_shield = int(data)
209
210 class WeaponDataParser(ItemDataParser):
211         def create_item(self, name, kind):
212                 return Weapon(name, kind)
213
214         def handle_value(self, column, data):
215                 if column==5:
216                         self.current_item.speed = float(data)
217                 elif column==6:
218                         self.current_item.dps = float(data)
219
220 def get_upgrade_level(item, steps):
221         level = 0
222         for p in item.upgrades.keys():
223                 upgrade = item
224                 for i in range(steps):
225                         upgrade = upgrade.upgrades.get(p)
226                         if not upgrade:
227                                 return 0
228                 level = max(level, upgrade.droplevel)
229         return level
230
231 def write_best_category(out, prefix, items, steps):
232         best = ["best", "second"]
233
234         out.write('category "{}.{}_at_level"\n'.format(prefix, best[steps-1]))
235         out.write("{\n\tor\n\t{\n")
236         for it in items:
237                 upgrade_level = get_upgrade_level(it, steps)
238                 out.write("\t\tand\n\t\t{\n")
239                 if upgrade_level:
240                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
241                         out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade_level-1))
242                 else:
243                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
244                         out.write("\t\t\tmin_item_level {};\n".format(it.droplevel))
245                 out.write("\t\t};\n")
246         out.write("\t};\n};\n")
247
248 def print_debug(items):
249         upgrades = set()
250         for it in items:
251                 for u in it.upgrades.values():
252                         if u:
253                                 upgrades.add(u.name)
254
255         printed = set()
256
257         queue = []
258         while 1:
259                 for it in items:
260                         if it.name not in upgrades and it.name not in printed:
261                                 queue.append(it)
262                                 break
263
264                 if not queue:
265                         break
266
267                 while queue:
268                         it = queue.pop(0)
269                         if it.name in printed:
270                                 continue
271                         printed.add(it.name)
272
273                         print(it.name)
274                         print("  level: {}".format(it.droplevel))
275                         if isinstance(it, Armor):
276                                 print("  armor: {}".format(it.armor))
277                                 print("  evasion: {}".format(it.evasion))
278                                 print("  energy shield: {}".format(it.energy_shield))
279                         elif isinstance(it, Weapon):
280                                 print("  dps: {}".format(it.dps))
281                         for n, v in it.mods.items():
282                                 print("  {}: {}".format(n, v))
283                         for t, u in it.upgrades.items():
284                                 if u:
285                                         print("  upgrade {}: {}".format(t, u.name))
286                                         if u.name not in printed:
287                                                 queue.insert(0, u)
288
289 def main():
290         parser = argparse.ArgumentParser()
291         parser.add_argument("-g", "--debug", action="store_true", dest="debug")
292         args = parser.parse_args()
293
294         r = requests.get("https://www.pathofexile.com/item-data/armour")
295         p = ArmorDataParser()
296         p.feed(codecs.decode(r.content, r.encoding))
297
298         if args.debug:
299                 print_debug(p.items)
300
301         types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield),
302                 "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield),
303                 "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield),
304                 "scale": lambda i: (i.armor and i.evasion and not i.energy_shield),
305                 "plate": lambda i: (i.armor and not i.evasion and not i.energy_shield),
306                 "chain": lambda i: (i.armor and not i.evasion and i.energy_shield)}
307
308         out = open("armor.txt", "w")
309
310         for t, f in types.items():
311                 items = [i for i in p.items if f(i)]
312
313                 for i in range(1, 3):
314                         write_best_category(out, "armor.{}".format(t), items, i)
315
316                 out.write('category "armor.{}"\n'.format(t))
317                 out.write("{\n\tor\n\t{\n")
318                 for it in items:
319                         out.write('\t\tbase_type "{}";\n'.format(it.name))
320                 out.write("\t};\n};\n")
321
322         for b in ("best", "second"):
323                 out.write('category "armor.{}_at_level"\n'.format(b))
324                 out.write('{\n\tor\n\t{\n')
325                 for t in types.keys():
326                         out.write('\t\tcategory "armor.{}.{}_at_level";\n'.format(t, b))
327                 out.write("\t};\n};\n")
328
329         out.write('category "armor"\n{\n\tor\n\t{\n')
330         for h in p.headings:
331                 out.write('\t\tclass "{}";\n'.format(h))
332         out.write("\t};\n};\n")
333
334         r = requests.get("https://www.pathofexile.com/item-data/weapon")
335         p = WeaponDataParser()
336         p.feed(codecs.decode(r.content, r.encoding))
337
338         if args.debug:
339                 print_debug(p.items)
340
341         out = open("weapons.txt", "w")
342
343         for h in p.headings:
344                 items = [i for i in p.items if i.kind==h]
345
346                 for i in range(1, 3):
347                         write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i)
348
349         for b in ("best", "second"):
350                 out.write('category "weapon.{}_at_level"\n'.format(b))
351                 out.write('{\n\tor\n\t{\n')
352                 for h in p.headings:
353                         out.write('\t\tcategory "weapon.{}.{}_at_level";\n'.format(h.lower().replace(' ', '_'), b))
354                 out.write("\t};\n};\n")
355
356         out.write('category "weapon"\n{\n\tor\n\t{\n')
357         for h in p.headings:
358                 out.write('\t\tclass "{}";\n'.format(h))
359         out.write("\t};\n};\n")
360
361 if __name__=="__main__":
362         main()