Monday, 17 November 2014

Finding character with most occurrence in a continuous stream.

Problem: Given a continuous character stream, at any stage find and remove (set its occurance count to 0) the character that has occured highest number of time.

Solution: The problem can be solved efficient using combination of hash map and linked list (or max heap). The below code is using hash map and linked list.

The information that should stored in hash map is reference to the node (linked list or max heap). The key in the hash map is the character. If problem is limited to the smaller set of alphabet then array can also be used in place of hash map.

Whenever a character is read from the stream, then reference (linked list or max heap) is retrieved from the hash map. If reference does not exist then a new node will be created and inserted in the linked list or hash map. Otherwise, frequency of character is updated in the node retrieved using reference from hash map.

After the frequency of node is updated, linked list must be worked upon to ensure sorted order is maintained ( O(n) operation). If max heap is used then maxHeapify operation must be run to ensure heap property is maintained.

For retrieving most occurred character return and remove head of linked list or root of the max heap.

 import java.io.BufferedInputStream;  
 import java.io.IOException;  
 import java.io.InputStream;  
 import java.util.HashMap;  
 import java.util.Map;  
 /**  
  * Given a continuous character stream, at any stage find and remove (set its  
  * occurance count to 0) the character that has occured highest number of time.  
  *   
  * @author sanahuja  
  *   
  */  
 public class DoublyList {  
      private static Node head = null;  
      private static Node tail = null;  
      private static Map<Character, Node> chars = new HashMap<Character, Node>();  
      public static void main(String[] args) throws IOException {  
           InputStream s = new BufferedInputStream(System.in);  
           while (true) {  
                char c = (char) s.read();  
                if (c == '\r' || c == '\t' || c == ' ' || c == '\n') {  
                     continue;  
                } else if (c == '-') {  
                     if (head == null) {  
                          System.out.println("No char yet.");  
                     } else {  
                          // find and remove highest frequency char  
                          System.out.println("Highest char " + head.c  
                                    + " with count " + head.frequency);  
                          chars.remove(head.c);  
                          if (head == tail) {  
                               head = null;  
                               tail = null;  
                          } else {  
                               Node t = head;  
                               head = t.next;  
                               t.next = null;  
                               head.prev = null;  
                          }  
                     }  
                } else if (c == '$') {  
                     break;  
                } else {  
                     // add the char to list  
                     if (chars.containsKey(c)) {  
                          // update existing  
                          Node n = chars.get(c);  
                          n.frequency++;  
                          while (n.prev != null && n.frequency > n.prev.frequency) {  
                               Node p = n.prev;  
                               if (p.prev != null) {  
                                    p.prev.next = n;  
                               }  
                               p.next = n.next;  
                               if (n.next != null) {  
                                    n.next.prev = p;  
                               }  
                               n.prev = p.prev;  
                               p.prev = n;  
                               n.next = p;  
                               if (tail == n) {  
                                    tail = p;  
                               }  
                          }  
                          if (n.prev == null) {  
                               head = n;  
                          }  
                     } else {  
                          // add new node  
                          Node n = new Node(c);  
                          if (tail == null) {  
                               tail = n;  
                          } else {  
                               tail.next = n;  
                               n.prev = tail;  
                               tail = n;  
                          }  
                          if (head == null)  
                               head = n;  
                          chars.put(c, n);  
                     }  
                }  
           }  
           s.close();  
      }  
 }  
 class Node {  
      public char c;  
      public int frequency;  
      Node next;  
      Node prev;  
      public Node(char c) {  
           this.c = c;  
           this.frequency = 1;  
           next = null;  
           prev = null;  
      }  
 }  

No comments:

Post a Comment