What is Quantization - Lightning AI

1.0Lightning AIhttps://lightning.ai/pagesJP Hennessyhttps://lightning.ai/pages/author/jplightning-ai/What is Quantization - Lightning AIrich600338<blockquote class="wp-embedded-content" data-secret="sNJrYNREqw"><a href="https://lightning.ai/pages/community/article/what-is-quantization/">What is Quantization</a></blockquote><iframe sandbox="allow-scripts" security="restricted" src="https://lightning.ai/pages/community/article/what-is-quantization/embed/#?secret=sNJrYNREqw" width="600" height="338" title="“What is Quantization” — Lightning AI" data-secret="sNJrYNREqw" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" class="wp-embedded-content"></iframe><script type="text/javascript"> /* <![CDATA[ */ /*! This file is auto-generated */ !function(d,l){"use strict";l.querySelector&&d.addEventListener&&"undefined"!=typeof URL&&(d.wp=d.wp||{},d.wp.receiveEmbedMessage||(d.wp.receiveEmbedMessage=function(e){var t=e.data;if((t||t.secret||t.message||t.value)&&!/[^a-zA-Z0-9]/.test(t.secret)){for(var s,r,n,a=l.querySelectorAll('iframe[data-secret="'+t.secret+'"]'),o=l.querySelectorAll('blockquote[data-secret="'+t.secret+'"]'),c=new RegExp("^https?:$","i"),i=0;i<o.length;i++)o[i].style.display="none";for(i=0;i<a.length;i++)s=a[i],e.source===s.contentWindow&&(s.removeAttribute("style"),"height"===t.message?(1e3<(r=parseInt(t.value,10))?r=1e3:~~r<200&&(r=200),s.height=r):"link"===t.message&&(r=new URL(s.getAttribute("src")),n=new URL(t.value),c.test(n.protocol))&&n.host===r.host&&l.activeElement===s&&(d.top.location.href=t.value))}},d.addEventListener("message",d.wp.receiveEmbedMessage,!1),l.addEventListener("DOMContentLoaded",function(){for(var e,t,s=l.querySelectorAll("iframe.wp-embedded-content"),r=0;r<s.length;r++)(t=(e=s[r]).getAttribute("data-secret"))||(t=Math.random().toString(36).substring(2,12),e.src+="#?secret="+t,e.setAttribute("data-secret",t)),e.contentWindow.postMessage({message:"ready",secret:t},"*")},!1)))}(window,document); /* ]]> */ </script> https://lightningaidev.wpengine.com/wp-content/uploads/2023/10/What-is-Quantization-1.png12001200Introduction The aim of quantization is to reduce the memory usage of the model parameters by using lower precision types than your typical float32 or (b)float16. Using lower bit widths like 8-bit and 4-bit uses less memory compared to float32 (32-bit) and (b)float16 (16-bit). The quantization procedure does not simply trim the number of bits... Read more »