]> git.tdb.fi Git - poefilter.git/blob - scrape-item-data.py
Store item data in a list instead of a dict
[poefilter.git] / scrape-item-data.py
1 #!/usr/bin/python3
2
3 import requests
4 import html.parser
5 import codecs
6
7 class Item:
8         def __init__(self, name, kind):
9                 self.name = name
10                 self.kind = kind
11                 self.droplevel = 0
12                 self.mods = {}
13                 self.upgrades = {"attrs":None, "mods":None}
14
15         def is_compatible(self, other):
16                 return self.kind==other.kind
17
18         def compare_mods(self, other):
19                 result = 0
20                 for n, v in self.mods.items():
21                         ov = other.mods.get(n, 0)
22                         if v<ov:
23                                 return -1
24                         elif v>ov:
25                                 result = 1
26
27                 for on, ov in other.mods.items():
28                         v = self.mods.get(on, 0)
29                         if v<ov:
30                                 return -1
31                         elif v>ov:
32                                 result = 1
33                 
34                 return result
35
36         def compare_attrs(self, other):
37                 return 0
38
39         def check_upgrade(self, other):
40                 if self.droplevel<=other.droplevel:
41                         return
42                 if not self.is_compatible(other):
43                         return
44
45                 mods = self.compare_mods(other)
46                 attrs = self.compare_attrs(other)
47
48                 if not other.upgrades["mods"]:
49                         if mods>0 or (mods==0 and attrs>0):
50                                 other.upgrades["mods"] = self
51                 if not other.upgrades["attrs"]:
52                         if attrs>0 or (attrs==0 and mods>0):
53                                 other.upgrades["attrs"] = self
54
55 class Armor(Item):
56         def __init__(self, name, kind):
57                 super(Armor, self).__init__(name, kind)
58
59                 self.armor = 0
60                 self.evasion = 0
61                 self.energy_shield = 0
62
63         def is_compatible(self, other):
64                 if (self.armor!=0)!=(other.armor!=0):
65                         return False
66                 if (self.evasion!=0)!=(other.evasion!=0):
67                         return False
68                 if (self.energy_shield!=0)!=(other.energy_shield!=0):
69                         return False
70
71                 return super(Armor, self).is_compatible(other)
72
73         def compare_attrs(self, other):
74                 if self.armor<other.armor:
75                         return -1
76                 if self.evasion<other.evasion:
77                         return -1
78                 if self.energy_shield<other.energy_shield:
79                         return -1
80
81                 if self.armor>other.armor:
82                         return 1
83                 if self.evasion>other.evasion:
84                         return 1
85                 if self.energy_shield>other.energy_shield:
86                         return 1
87
88                 return 0
89
90 class Weapon(Item):
91         def __init__(self, name, kind):
92                 super(Weapon, self).__init__(name, kind)
93
94                 self.speed = 0
95                 self.dps = 0
96
97         def compare_attrs(self, other):
98                 if self.dps<other.dps:
99                         return -1
100                 if self.dps>other.dps:
101                         return 1
102
103                 return 0
104
105 class ItemDataParser(html.parser.HTMLParser):
106         def __init__(self):
107                 super(ItemDataParser, self).__init__()
108
109                 self.in_items_table = False
110                 self.column = 0
111                 self.in_cell = False
112                 self.items = []
113                 self.current_item = None
114                 self.in_heading = False
115                 self.headings = []
116                 self.current_heading = None
117                 self.mod_row = False
118                 self.mod_names = []
119                 self.mod_index = 0
120
121         def handle_starttag(self, tag, attrs):
122                 if tag=="table":
123                         for n, v in attrs:
124                                 if n=="class" and v=="itemDataTable":
125                                         self.in_items_table = True
126                 elif tag=="tr":
127                         self.mod_row = False
128                         for n, v in attrs:
129                                 if n=="class" and v.endswith("_mod"):
130                                         self.mod_row = True
131                         self.column = 0
132                         if not self.mod_row:
133                                 self.current_item = None
134                         self.mod_names = []
135                 elif tag=="td":
136                         self.column += 1
137                         self.in_cell = True
138                 elif tag=="h1":
139                         self.in_heading = True
140
141         def handle_endtag(self, tag):
142                 if tag=="table":
143                         self.in_items_table = False
144                 elif tag=="td":
145                         self.in_cell = False
146                 elif tag=="tr":
147                         if self.current_item and self.mod_row:
148                                 for it in self.items:
149                                         self.current_item.check_upgrade(it)
150                 elif tag=="h1":
151                         self.in_heading = False
152
153         def handle_data(self, data):
154                 data = data.strip()
155                 if not data:
156                         return
157
158                 if self.in_heading:
159                         self.current_heading = data
160                         self.headings.append(self.current_heading)
161                 elif self.in_items_table and self.in_cell:
162                         if self.mod_row:
163                                 if self.column==1:
164                                         self.mod_names.append(data)
165                                         self.mod_index = 0
166                                 elif self.column==2:
167                                         if " to " in data:
168                                                 value = int(data.split(" to ", 1)[1])
169                                         else:
170                                                 value = int(data)
171                                         name = self.mod_names[self.mod_index]
172                                         if name!="From Armour Movement Speed +%":
173                                                 self.current_item.mods[name] = value
174                                         self.mod_index += 1
175                         else:
176                                 if self.column==2:
177                                         self.current_item = self.create_item(data, self.current_heading)
178                                         self.items.append(self.current_item)
179                                 elif self.column==3:
180                                         self.current_item.droplevel = int(data)
181                                 elif self.column>=4:
182                                         self.handle_value(self.column, data)
183
184         def create_item(self, name, kind):
185                 pass
186
187         def handle_value(self, column, data):
188                 pass
189
190 class ArmorDataParser(ItemDataParser):
191         def create_item(self, name, kind):
192                 return Armor(name, kind)
193
194         def handle_value(self, column, data):
195                 if column==4:
196                         self.current_item.armor = int(data)
197                 elif column==5:
198                         self.current_item.evasion = int(data)
199                 elif column==6:
200                         self.current_item.energy_shield = int(data)
201
202 class WeaponDataParser(ItemDataParser):
203         def create_item(self, name, kind):
204                 return Weapon(name, kind)
205
206         def handle_value(self, column, data):
207                 if column==5:
208                         self.current_item.speed = float(data)
209                 elif column==6:
210                         self.current_item.dps = float(data)
211
212 def get_upgrade_level(item, steps):
213         level = 0
214         for p in item.upgrades.keys():
215                 upgrade = item
216                 for i in range(steps):
217                         upgrade = upgrade.upgrades.get(p)
218                         if not upgrade:
219                                 return 0
220                 level = max(level, upgrade.droplevel)
221         return level
222
223 def write_best_category(out, prefix, items, steps):
224         best = ["best", "second"]
225
226         out.write('category "{}.{}_at_level"\n'.format(prefix, best[steps-1]))
227         out.write("{\n\tor\n\t{\n")
228         for it in items:
229                 upgrade_level = get_upgrade_level(it, steps)
230                 out.write("\t\tand\n\t\t{\n")
231                 if upgrade_level:
232                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
233                         out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade_level-1))
234                 else:
235                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
236                         out.write("\t\t\tmin_item_level {};\n".format(it.droplevel))
237                 out.write("\t\t};\n")
238         out.write("\t};\n};\n")
239
240 def main():
241         r = requests.get("https://www.pathofexile.com/item-data/armour")
242         p = ArmorDataParser()
243         p.feed(codecs.decode(r.content, r.encoding))
244
245         types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield),
246                 "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield),
247                 "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield),
248                 "scale": lambda i: (i.armor and i.evasion and not i.energy_shield),
249                 "plate": lambda i: (i.armor and not i.evasion and not i.energy_shield),
250                 "chain": lambda i: (i.armor and not i.evasion and i.energy_shield)}
251
252         out = open("armor.txt", "w")
253
254         for t, f in types.items():
255                 items = [i for i in p.items if f(i)]
256
257                 for i in range(1, 3):
258                         write_best_category(out, "armor.{}".format(t), items, i)
259                         """out.write('category "armor.{}.{}_at_level"\n'.format(t, best[i-1]))
260                         out.write("{\n\tor\n\t{\n")
261                         for it in items:
262                                 next_level = 0
263                                 n = it.name
264                                 for k in range(i):
265                                         n = p.upgrades.get(n, "")
266                                 if n:
267                                         next_level = p.items[n].droplevel
268                                 if next_level:
269                                         out.write("\t\tand\n\t\t{\n")
270                                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
271                                         out.write("\t\t\tmax_item_level {};\n".format(next_level-1))
272                                         out.write("\t\t};\n")
273                                 else:
274                                         out.write('\t\tbase_type "{}";\n'.format(it.name))
275                         out.write("\t};\n};\n")"""
276
277                 out.write('category "armor.{}"\n'.format(t))
278                 out.write("{\n\tor\n\t{\n")
279                 for it in items:
280                         out.write('\t\tbase_type "{}";\n'.format(it.name))
281                 out.write("\t};\n};\n")
282
283         out.write('category "armor"\n{\n\tor\n\t{\n')
284         for h in p.headings:
285                 out.write('\t\tclass "{}";\n'.format(h))
286         out.write("\t};\n};\n")
287
288         r = requests.get("https://www.pathofexile.com/item-data/weapon")
289         p = WeaponDataParser()
290         p.feed(codecs.decode(r.content, r.encoding))
291
292         out = open("weapons.txt", "w")
293
294         for h in p.headings:
295                 items = [i for i in p.items if i.kind==h]
296
297                 for i in range(1, 3):
298                         write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i)
299
300         out.write('category "weapon"\n{\n\tor\n\t{\n')
301         for h in p.headings:
302                 out.write('\t\tclass "{}";\n'.format(h))
303         out.write("\t};\n};\n")
304
305 if __name__=="__main__":
306         main()